net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java

   1 /*******************************************************************************
   2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
   3  * All rights reserved. This program and the accompanying materials
   4  * are made available under the terms of the Common Public License v0.5
   5  * which accompanies this distribution, and is available at
   6  * http://www.eclipse.org/legal/cpl-v05.html
   7  *
   8  * Contributors:
   9  *     IBM Corporation - initial API and implementation
  10  ******************************************************************************/
  11 package net.sourceforge.phpdt.internal.compiler.parser;
  12
  13 import java.util.ArrayList;
  14 import java.util.Iterator;
  15 import java.util.List;
  16
  17 import net.sourceforge.phpdt.core.compiler.IScanner;
  18 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
  19 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
  20 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
  21
  22 public class Scanner implements IScanner, ITerminalSymbols {
  23
  24   /* APIs ares
  25    - getNextToken() which return the current type of the token
  26      (this value is not memorized by the scanner)
  27    - getCurrentTokenSource() which provides with the token "REAL" source
  28      (aka all unicode have been transformed into a correct char)
  29    - sourceStart gives the position into the stream
  30    - currentPosition-1 gives the sourceEnd position into the stream
  31   */
  32
  33   // 1.4 feature
  34   private boolean assertMode;
  35   public boolean useAssertAsAnIndentifier = false;
  36   //flag indicating if processed source contains occurrences of keyword assert
  37   public boolean containsAssertKeyword = false;
  38
  39   public boolean recordLineSeparator;
  40   public boolean phpMode = false;
  41
  42   public char currentCharacter;
  43   public int startPosition;
  44   public int currentPosition;
  45   public int initialPosition, eofPosition;
  46   // after this position eof are generated instead of real token from the source
  47
  48   public boolean tokenizeComments;
  49   public boolean tokenizeWhiteSpace;
  50
  51   //source should be viewed as a window (aka a part)
  52   //of a entire very large stream
  53   public char source[];
  54
  55   //unicode support
  56   public char[] withoutUnicodeBuffer;
  57   public int withoutUnicodePtr;
  58   //when == 0 ==> no unicode in the current token
  59   public boolean unicodeAsBackSlash = false;
  60
  61   public boolean scanningFloatLiteral = false;
  62
  63   //support for /** comments
  64   //public char[][] comments = new char[10][];
  65   public int[] commentStops = new int[10];
  66   public int[] commentStarts = new int[10];
  67   public int commentPtr = -1; // no comment test with commentPtr value -1
  68
  69   //diet parsing support - jump over some method body when requested
  70   public boolean diet = false;
  71
  72   //support for the  poor-line-debuggers ....
  73   //remember the position of the cr/lf
  74   public int[] lineEnds = new int[250];
  75   public int linePtr = -1;
  76   public boolean wasAcr = false;
  77
  78   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
  79
  80   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
  81   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
  82   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
  83   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
  84   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
  85   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
  86   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
  87
  88   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
  89   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
  90   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
  91   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
  92
  93   //----------------optimized identifier managment------------------
  94   static final char[] charArray_a = new char[] { 'a' },
  95     charArray_b = new char[] { 'b' },
  96     charArray_c = new char[] { 'c' },
  97     charArray_d = new char[] { 'd' },
  98     charArray_e = new char[] { 'e' },
  99     charArray_f = new char[] { 'f' },
 100     charArray_g = new char[] { 'g' },
 101     charArray_h = new char[] { 'h' },
 102     charArray_i = new char[] { 'i' },
 103     charArray_j = new char[] { 'j' },
 104     charArray_k = new char[] { 'k' },
 105     charArray_l = new char[] { 'l' },
 106     charArray_m = new char[] { 'm' },
 107     charArray_n = new char[] { 'n' },
 108     charArray_o = new char[] { 'o' },
 109     charArray_p = new char[] { 'p' },
 110     charArray_q = new char[] { 'q' },
 111     charArray_r = new char[] { 'r' },
 112     charArray_s = new char[] { 's' },
 113     charArray_t = new char[] { 't' },
 114     charArray_u = new char[] { 'u' },
 115     charArray_v = new char[] { 'v' },
 116     charArray_w = new char[] { 'w' },
 117     charArray_x = new char[] { 'x' },
 118     charArray_y = new char[] { 'y' },
 119     charArray_z = new char[] { 'z' };
 120
 121   static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
 122   static final int TableSize = 30, InternalTableSize = 6;
 123   //30*6 = 180 entries
 124   public static final int OptimizedLength = 6;
 125   public /*static*/
 126   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
 127   // support for detecting non-externalized string literals
 128   int currentLineNr = -1;
 129   int previousLineNr = -1;
 130   NLSLine currentLine = null;
 131   List lines = new ArrayList();
 132   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
 133   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
 134   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
 135   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
 136   public StringLiteral[] nonNLSStrings = null;
 137   public boolean checkNonExternalizedStringLiterals = true;
 138   public boolean wasNonExternalizedStringLiteral = false;
 139
 140   /*static*/ {
 141     for (int i = 0; i < 6; i++) {
 142       for (int j = 0; j < TableSize; j++) {
 143         for (int k = 0; k < InternalTableSize; k++) {
 144           charArray_length[i][j][k] = initCharArray;
 145         }
 146       }
 147     }
 148   }
 149   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
 150
 151   public static final int RoundBracket = 0;
 152   public static final int SquareBracket = 1;
 153   public static final int CurlyBracket = 2;
 154   public static final int BracketKinds = 3;
 155
 156   public static final boolean DEBUG = false;
 157
 158   public Scanner() {
 159     this(false, false);
 160   }
 161   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
 162     this(tokenizeComments, tokenizeWhiteSpace, false);
 163   }
 164
 165   /**
 166    * Determines if the specified character is
 167    * permissible as the first character in a PHP identifier
 168    */
 169   public static boolean isPHPIdentifierStart(char ch) {
 170     return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 171   }
 172
 173   /**
 174    * Determines if the specified character may be part of a PHP identifier as
 175    * other than the first character
 176    */
 177   public static boolean isPHPIdentifierPart(char ch) {
 178     return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 179   }
 180
 181   public final boolean atEnd() {
 182     // This code is not relevant if source is
 183     // Only a part of the real stream input
 184
 185     return source.length == currentPosition;
 186   }
 187   public char[] getCurrentIdentifierSource() {
 188     //return the token REAL source (aka unicodes are precomputed)
 189
 190     char[] result;
 191     //    if (withoutUnicodePtr != 0)
 192     //      //0 is used as a fast test flag so the real first char is in position 1
 193     //      System.arraycopy(
 194     //        withoutUnicodeBuffer,
 195     //        1,
 196     //        result = new char[withoutUnicodePtr],
 197     //        0,
 198     //        withoutUnicodePtr);
 199     //    else {
 200     int length = currentPosition - startPosition;
 201     switch (length) { // see OptimizedLength
 202       case 1 :
 203         return optimizedCurrentTokenSource1();
 204       case 2 :
 205         return optimizedCurrentTokenSource2();
 206       case 3 :
 207         return optimizedCurrentTokenSource3();
 208       case 4 :
 209         return optimizedCurrentTokenSource4();
 210       case 5 :
 211         return optimizedCurrentTokenSource5();
 212       case 6 :
 213         return optimizedCurrentTokenSource6();
 214     }
 215     //no optimization
 216     System.arraycopy(source, startPosition, result = new char[length], 0, length);
 217     //   }
 218     return result;
 219   }
 220   public int getCurrentTokenEndPosition() {
 221     return this.currentPosition - 1;
 222   }
 223
 224   public final char[] getCurrentTokenSource() {
 225     // Return the token REAL source (aka unicodes are precomputed)
 226
 227     char[] result;
 228     //    if (withoutUnicodePtr != 0)
 229     //      // 0 is used as a fast test flag so the real first char is in position 1
 230     //      System.arraycopy(
 231     //        withoutUnicodeBuffer,
 232     //        1,
 233     //        result = new char[withoutUnicodePtr],
 234     //        0,
 235     //        withoutUnicodePtr);
 236     //    else {
 237     int length;
 238     System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
 239     //    }
 240     return result;
 241   }
 242
 243   public final char[] getCurrentTokenSource(int startPos) {
 244     // Return the token REAL source (aka unicodes are precomputed)
 245
 246     char[] result;
 247     //    if (withoutUnicodePtr != 0)
 248     //      // 0 is used as a fast test flag so the real first char is in position 1
 249     //      System.arraycopy(
 250     //        withoutUnicodeBuffer,
 251     //        1,
 252     //        result = new char[withoutUnicodePtr],
 253     //        0,
 254     //        withoutUnicodePtr);
 255     //    else {
 256     int length;
 257     System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
 258     //  }
 259     return result;
 260   }
 261
 262   public final char[] getCurrentTokenSourceString() {
 263     //return the token REAL source (aka unicodes are precomputed).
 264     //REMOVE the two " that are at the beginning and the end.
 265
 266     char[] result;
 267     if (withoutUnicodePtr != 0)
 268       //0 is used as a fast test flag so the real first char is in position 1
 269       System.arraycopy(withoutUnicodeBuffer, 2,
 270       //2 is 1 (real start) + 1 (to jump over the ")
 271       result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
 272     else {
 273       int length;
 274       System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 275     }
 276     return result;
 277   }
 278   public int getCurrentTokenStartPosition() {
 279     return this.startPosition;
 280   }
 281
 282   public final char[] getCurrentStringLiteralSource() {
 283     // Return the token REAL source (aka unicodes are precomputed)
 284
 285     char[] result;
 286
 287     int length;
 288     System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 289     //    }
 290     return result;
 291   }
 292
 293   /*
 294    * Search the source position corresponding to the end of a given line number
 295    *
 296    * Line numbers are 1-based, and relative to the scanner initialPosition.
 297    * Character positions are 0-based.
 298    *
 299    * In case the given line number is inconsistent, answers -1.
 300    */
 301   public final int getLineEnd(int lineNumber) {
 302
 303     if (lineEnds == null)
 304       return -1;
 305     if (lineNumber >= lineEnds.length)
 306       return -1;
 307     if (lineNumber <= 0)
 308       return -1;
 309
 310     if (lineNumber == lineEnds.length - 1)
 311       return eofPosition;
 312     return lineEnds[lineNumber - 1];
 313     // next line start one character behind the lineEnd of the previous line
 314   }
 315   /**
 316    * Search the source position corresponding to the beginning of a given line number
 317    *
 318    * Line numbers are 1-based, and relative to the scanner initialPosition.
 319    * Character positions are 0-based.
 320    *
 321    * e.g.       getLineStart(1) --> 0   i.e. first line starts at character 0.
 322    *
 323    * In case the given line number is inconsistent, answers -1.
 324    */
 325   public final int getLineStart(int lineNumber) {
 326
 327     if (lineEnds == null)
 328       return -1;
 329     if (lineNumber >= lineEnds.length)
 330       return -1;
 331     if (lineNumber <= 0)
 332       return -1;
 333
 334     if (lineNumber == 1)
 335       return initialPosition;
 336     return lineEnds[lineNumber - 2] + 1;
 337     // next line start one character behind the lineEnd of the previous line
 338   }
 339   public final boolean getNextChar(char testedChar) {
 340     //BOOLEAN
 341     //handle the case of unicode.
 342     //when a unicode appears then we must use a buffer that holds char internal values
 343     //At the end of this method currentCharacter holds the new visited char
 344     //and currentPosition points right next after it
 345     //Both previous lines are true if the currentCharacter is == to the testedChar
 346     //On false, no side effect has occured.
 347
 348     //ALL getNextChar.... ARE OPTIMIZED COPIES
 349
 350     int temp = currentPosition;
 351     try {
 352       currentCharacter = source[currentPosition++];
 353       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 354       //        && (source[currentPosition] == 'u')) {
 355       //        //-------------unicode traitement ------------
 356       //        int c1, c2, c3, c4;
 357       //        int unicodeSize = 6;
 358       //        currentPosition++;
 359       //        while (source[currentPosition] == 'u') {
 360       //          currentPosition++;
 361       //          unicodeSize++;
 362       //        }
 363       //
 364       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 365       //          || c1 < 0)
 366       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 367       //            || c2 < 0)
 368       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 369       //            || c3 < 0)
 370       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 371       //            || c4 < 0)) {
 372       //          currentPosition = temp;
 373       //          return false;
 374       //        }
 375       //
 376       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 377       //        if (currentCharacter != testedChar) {
 378       //          currentPosition = temp;
 379       //          return false;
 380       //        }
 381       //        unicodeAsBackSlash = currentCharacter == '\\';
 382       //
 383       //        //need the unicode buffer
 384       //        if (withoutUnicodePtr == 0) {
 385       //          //buffer all the entries that have been left aside....
 386       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 387       //          System.arraycopy(
 388       //            source,
 389       //            startPosition,
 390       //            withoutUnicodeBuffer,
 391       //            1,
 392       //            withoutUnicodePtr);
 393       //        }
 394       //        //fill the buffer with the char
 395       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 396       //        return true;
 397       //
 398       //      } //-------------end unicode traitement--------------
 399       //      else {
 400       if (currentCharacter != testedChar) {
 401         currentPosition = temp;
 402         return false;
 403       }
 404       unicodeAsBackSlash = false;
 405       //        if (withoutUnicodePtr != 0)
 406       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 407       return true;
 408       //      }
 409     } catch (IndexOutOfBoundsException e) {
 410       unicodeAsBackSlash = false;
 411       currentPosition = temp;
 412       return false;
 413     }
 414   }
 415   public final int getNextChar(char testedChar1, char testedChar2) {
 416     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
 417     //test can be done with (x==0) for the first and (x>0) for the second
 418     //handle the case of unicode.
 419     //when a unicode appears then we must use a buffer that holds char internal values
 420     //At the end of this method currentCharacter holds the new visited char
 421     //and currentPosition points right next after it
 422     //Both previous lines are true if the currentCharacter is == to the testedChar1/2
 423     //On false, no side effect has occured.
 424
 425     //ALL getNextChar.... ARE OPTIMIZED COPIES
 426
 427     int temp = currentPosition;
 428     try {
 429       int result;
 430       currentCharacter = source[currentPosition++];
 431       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 432       //        && (source[currentPosition] == 'u')) {
 433       //        //-------------unicode traitement ------------
 434       //        int c1, c2, c3, c4;
 435       //        int unicodeSize = 6;
 436       //        currentPosition++;
 437       //        while (source[currentPosition] == 'u') {
 438       //          currentPosition++;
 439       //          unicodeSize++;
 440       //        }
 441       //
 442       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 443       //          || c1 < 0)
 444       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 445       //            || c2 < 0)
 446       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 447       //            || c3 < 0)
 448       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 449       //            || c4 < 0)) {
 450       //          currentPosition = temp;
 451       //          return 2;
 452       //        }
 453       //
 454       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 455       //        if (currentCharacter == testedChar1)
 456       //          result = 0;
 457       //        else if (currentCharacter == testedChar2)
 458       //          result = 1;
 459       //        else {
 460       //          currentPosition = temp;
 461       //          return -1;
 462       //        }
 463       //
 464       //        //need the unicode buffer
 465       //        if (withoutUnicodePtr == 0) {
 466       //          //buffer all the entries that have been left aside....
 467       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 468       //          System.arraycopy(
 469       //            source,
 470       //            startPosition,
 471       //            withoutUnicodeBuffer,
 472       //            1,
 473       //            withoutUnicodePtr);
 474       //        }
 475       //        //fill the buffer with the char
 476       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 477       //        return result;
 478       //      } //-------------end unicode traitement--------------
 479       //      else {
 480       if (currentCharacter == testedChar1)
 481         result = 0;
 482       else if (currentCharacter == testedChar2)
 483         result = 1;
 484       else {
 485         currentPosition = temp;
 486         return -1;
 487       }
 488
 489       //        if (withoutUnicodePtr != 0)
 490       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 491       return result;
 492       //     }
 493     } catch (IndexOutOfBoundsException e) {
 494       currentPosition = temp;
 495       return -1;
 496     }
 497   }
 498   public final boolean getNextCharAsDigit() {
 499     //BOOLEAN
 500     //handle the case of unicode.
 501     //when a unicode appears then we must use a buffer that holds char internal values
 502     //At the end of this method currentCharacter holds the new visited char
 503     //and currentPosition points right next after it
 504     //Both previous lines are true if the currentCharacter is a digit
 505     //On false, no side effect has occured.
 506
 507     //ALL getNextChar.... ARE OPTIMIZED COPIES
 508
 509     int temp = currentPosition;
 510     try {
 511       currentCharacter = source[currentPosition++];
 512       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 513       //        && (source[currentPosition] == 'u')) {
 514       //        //-------------unicode traitement ------------
 515       //        int c1, c2, c3, c4;
 516       //        int unicodeSize = 6;
 517       //        currentPosition++;
 518       //        while (source[currentPosition] == 'u') {
 519       //          currentPosition++;
 520       //          unicodeSize++;
 521       //        }
 522       //
 523       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 524       //          || c1 < 0)
 525       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 526       //            || c2 < 0)
 527       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 528       //            || c3 < 0)
 529       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 530       //            || c4 < 0)) {
 531       //          currentPosition = temp;
 532       //          return false;
 533       //        }
 534       //
 535       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 536       //        if (!Character.isDigit(currentCharacter)) {
 537       //          currentPosition = temp;
 538       //          return false;
 539       //        }
 540       //
 541       //        //need the unicode buffer
 542       //        if (withoutUnicodePtr == 0) {
 543       //          //buffer all the entries that have been left aside....
 544       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 545       //          System.arraycopy(
 546       //            source,
 547       //            startPosition,
 548       //            withoutUnicodeBuffer,
 549       //            1,
 550       //            withoutUnicodePtr);
 551       //        }
 552       //        //fill the buffer with the char
 553       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 554       //        return true;
 555       //      } //-------------end unicode traitement--------------
 556       //      else {
 557       if (!Character.isDigit(currentCharacter)) {
 558         currentPosition = temp;
 559         return false;
 560       }
 561       //        if (withoutUnicodePtr != 0)
 562       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 563       return true;
 564       //      }
 565     } catch (IndexOutOfBoundsException e) {
 566       currentPosition = temp;
 567       return false;
 568     }
 569   }
 570   public final boolean getNextCharAsDigit(int radix) {
 571     //BOOLEAN
 572     //handle the case of unicode.
 573     //when a unicode appears then we must use a buffer that holds char internal values
 574     //At the end of this method currentCharacter holds the new visited char
 575     //and currentPosition points right next after it
 576     //Both previous lines are true if the currentCharacter is a digit base on radix
 577     //On false, no side effect has occured.
 578
 579     //ALL getNextChar.... ARE OPTIMIZED COPIES
 580
 581     int temp = currentPosition;
 582     try {
 583       currentCharacter = source[currentPosition++];
 584       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 585       //        && (source[currentPosition] == 'u')) {
 586       //        //-------------unicode traitement ------------
 587       //        int c1, c2, c3, c4;
 588       //        int unicodeSize = 6;
 589       //        currentPosition++;
 590       //        while (source[currentPosition] == 'u') {
 591       //          currentPosition++;
 592       //          unicodeSize++;
 593       //        }
 594       //
 595       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 596       //          || c1 < 0)
 597       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 598       //            || c2 < 0)
 599       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 600       //            || c3 < 0)
 601       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 602       //            || c4 < 0)) {
 603       //          currentPosition = temp;
 604       //          return false;
 605       //        }
 606       //
 607       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 608       //        if (Character.digit(currentCharacter, radix) == -1) {
 609       //          currentPosition = temp;
 610       //          return false;
 611       //        }
 612       //
 613       //        //need the unicode buffer
 614       //        if (withoutUnicodePtr == 0) {
 615       //          //buffer all the entries that have been left aside....
 616       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 617       //          System.arraycopy(
 618       //            source,
 619       //            startPosition,
 620       //            withoutUnicodeBuffer,
 621       //            1,
 622       //            withoutUnicodePtr);
 623       //        }
 624       //        //fill the buffer with the char
 625       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 626       //        return true;
 627       //      } //-------------end unicode traitement--------------
 628       //      else {
 629       if (Character.digit(currentCharacter, radix) == -1) {
 630         currentPosition = temp;
 631         return false;
 632       }
 633       //        if (withoutUnicodePtr != 0)
 634       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 635       return true;
 636       //      }
 637     } catch (IndexOutOfBoundsException e) {
 638       currentPosition = temp;
 639       return false;
 640     }
 641   }
 642   public boolean getNextCharAsJavaIdentifierPart() {
 643     //BOOLEAN
 644     //handle the case of unicode.
 645     //when a unicode appears then we must use a buffer that holds char internal values
 646     //At the end of this method currentCharacter holds the new visited char
 647     //and currentPosition points right next after it
 648     //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
 649     //On false, no side effect has occured.
 650
 651     //ALL getNextChar.... ARE OPTIMIZED COPIES
 652
 653     int temp = currentPosition;
 654     try {
 655       currentCharacter = source[currentPosition++];
 656       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 657       //        && (source[currentPosition] == 'u')) {
 658       //        //-------------unicode traitement ------------
 659       //        int c1, c2, c3, c4;
 660       //        int unicodeSize = 6;
 661       //        currentPosition++;
 662       //        while (source[currentPosition] == 'u') {
 663       //          currentPosition++;
 664       //          unicodeSize++;
 665       //        }
 666       //
 667       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 668       //          || c1 < 0)
 669       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 670       //            || c2 < 0)
 671       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 672       //            || c3 < 0)
 673       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 674       //            || c4 < 0)) {
 675       //          currentPosition = temp;
 676       //          return false;
 677       //        }
 678       //
 679       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 680       //        if (!isPHPIdentifierPart(currentCharacter)) {
 681       //          currentPosition = temp;
 682       //          return false;
 683       //        }
 684       //
 685       //        //need the unicode buffer
 686       //        if (withoutUnicodePtr == 0) {
 687       //          //buffer all the entries that have been left aside....
 688       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 689       //          System.arraycopy(
 690       //            source,
 691       //            startPosition,
 692       //            withoutUnicodeBuffer,
 693       //            1,
 694       //            withoutUnicodePtr);
 695       //        }
 696       //        //fill the buffer with the char
 697       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 698       //        return true;
 699       //      } //-------------end unicode traitement--------------
 700       //      else {
 701       if (!isPHPIdentifierPart(currentCharacter)) {
 702         currentPosition = temp;
 703         return false;
 704       }
 705
 706       //        if (withoutUnicodePtr != 0)
 707       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 708       return true;
 709       //      }
 710     } catch (IndexOutOfBoundsException e) {
 711       currentPosition = temp;
 712       return false;
 713     }
 714   }
 715
 716   public int getNextToken() throws InvalidInputException {
 717     int htmlPosition = currentPosition;
 718     try {
 719       while (!phpMode) {
 720         currentCharacter = source[currentPosition++];
 721         if (currentCharacter == '<') {
 722           if (getNextChar('?')) {
 723             currentCharacter = source[currentPosition++];
 724             if ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
 725               // <?
 726               startPosition = currentPosition;
 727               phpMode = true;
 728               if (tokenizeWhiteSpace) {
 729                 // && (whiteStart != currentPosition - 1)) {
 730                 // reposition scanner in case we are interested by spaces as tokens
 731                 startPosition = htmlPosition;
 732                 return TokenNameHTML;
 733               }
 734             } else {
 735               boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
 736               if (phpStart) {
 737                 int test = getNextChar('H', 'h');
 738                 if (test >= 0) {
 739                   test = getNextChar('P', 'p');
 740                   if (test >= 0) {
 741                     // <?PHP  <?php
 742                     startPosition = currentPosition;
 743                     phpMode = true;
 744
 745                     if (tokenizeWhiteSpace) {
 746                       // && (whiteStart != currentPosition - 1)) {
 747                       // reposition scanner in case we are interested by spaces as tokens
 748                       startPosition = htmlPosition;
 749                       return TokenNameHTML;
 750                     }
 751                   }
 752                 }
 753               }
 754             }
 755           }
 756         }
 757
 758         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 759           if (recordLineSeparator) {
 760             pushLineSeparator();
 761           } else {
 762             currentLine = null;
 763           }
 764         }
 765       }
 766     } //-----------------end switch while try--------------------
 767     catch (IndexOutOfBoundsException e) {
 768       if (tokenizeWhiteSpace) {
 769         // && (whiteStart != currentPosition - 1)) {
 770         // reposition scanner in case we are interested by spaces as tokens
 771         startPosition = htmlPosition;
 772       }
 773       return TokenNameEOF;
 774     }
 775
 776     if (phpMode) {
 777       this.wasAcr = false;
 778       if (diet) {
 779         jumpOverMethodBody();
 780         diet = false;
 781         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
 782       }
 783       try {
 784         while (true) { //loop for jumping over comments
 785           withoutUnicodePtr = 0;
 786           //start with a new token (even comment written with unicode )
 787
 788           // ---------Consume white space and handles startPosition---------
 789           int whiteStart = currentPosition;
 790           boolean isWhiteSpace;
 791           do {
 792             startPosition = currentPosition;
 793             currentCharacter = source[currentPosition++];
 794             //            if (((currentCharacter = source[currentPosition++]) == '\\')
 795             //              && (source[currentPosition] == 'u')) {
 796             //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
 797             //            } else {
 798             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 799               checkNonExternalizeString();
 800               if (recordLineSeparator) {
 801                 pushLineSeparator();
 802               } else {
 803                 currentLine = null;
 804               }
 805             }
 806             isWhiteSpace = (currentCharacter == ' ') || Character.isWhitespace(currentCharacter);
 807             //            }
 808           } while (isWhiteSpace);
 809           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
 810             // reposition scanner in case we are interested by spaces as tokens
 811             currentPosition--;
 812             startPosition = whiteStart;
 813             return TokenNameWHITESPACE;
 814           }
 815           //little trick to get out in the middle of a source compuation
 816           if (currentPosition > eofPosition)
 817             return TokenNameEOF;
 818
 819           // ---------Identify the next token-------------
 820
 821           switch (currentCharacter) {
 822             case '(' :
 823               return TokenNameLPAREN;
 824             case ')' :
 825               return TokenNameRPAREN;
 826             case '{' :
 827               return TokenNameLBRACE;
 828             case '}' :
 829               return TokenNameRBRACE;
 830             case '[' :
 831               return TokenNameLBRACKET;
 832             case ']' :
 833               return TokenNameRBRACKET;
 834             case ';' :
 835               return TokenNameSEMICOLON;
 836             case ',' :
 837               return TokenNameCOMMA;
 838
 839             case '.' :
 840               if (getNextCharAsDigit())
 841                 return scanNumber(true);
 842               return TokenNameDOT;
 843             case '+' :
 844               {
 845                 int test;
 846                 if ((test = getNextChar('+', '=')) == 0)
 847                   return TokenNamePLUS_PLUS;
 848                 if (test > 0)
 849                   return TokenNamePLUS_EQUAL;
 850                 return TokenNamePLUS;
 851               }
 852             case '-' :
 853               {
 854                 int test;
 855                 if ((test = getNextChar('-', '=')) == 0)
 856                   return TokenNameMINUS_MINUS;
 857                 if (test > 0)
 858                   return TokenNameMINUS_EQUAL;
 859                 if (getNextChar('>'))
 860                   return TokenNameMINUS_GREATER;
 861
 862                 return TokenNameMINUS;
 863               }
 864             case '~' :
 865               if (getNextChar('='))
 866                 return TokenNameTWIDDLE_EQUAL;
 867               return TokenNameTWIDDLE;
 868             case '!' :
 869               if (getNextChar('=')) {
 870                 if (getNextChar('=')) {
 871                   return TokenNameNOT_EQUAL_EQUAL;
 872                 }
 873                 return TokenNameNOT_EQUAL;
 874               }
 875               return TokenNameNOT;
 876             case '*' :
 877               if (getNextChar('='))
 878                 return TokenNameMULTIPLY_EQUAL;
 879               return TokenNameMULTIPLY;
 880             case '%' :
 881               if (getNextChar('='))
 882                 return TokenNameREMAINDER_EQUAL;
 883               return TokenNameREMAINDER;
 884             case '<' :
 885               {
 886                 int test;
 887                 if ((test = getNextChar('=', '<')) == 0)
 888                   return TokenNameLESS_EQUAL;
 889                 if (test > 0) {
 890                   if (getNextChar('='))
 891                     return TokenNameLEFT_SHIFT_EQUAL;
 892                   if (getNextChar('<')) {
 893                     int heredocStart = currentPosition;
 894                     int heredocLength = 0;
 895                     currentCharacter = source[currentPosition++];
 896                     if (isPHPIdentifierStart(currentCharacter)) {
 897                       currentCharacter = source[currentPosition++];
 898                     } else {
 899                       return TokenNameERROR;
 900                     }
 901                     while (isPHPIdentifierPart(currentCharacter)) {
 902                       currentCharacter = source[currentPosition++];
 903                     }
 904
 905                     heredocLength = currentPosition - heredocStart - 1;
 906
 907                     // heredoc end-tag determination
 908                     boolean endTag = true;
 909                     char ch;
 910                     do {
 911                       ch = source[currentPosition++];
 912                       if (ch == '\r' || ch == '\n') {
 913                         if (recordLineSeparator) {
 914                           pushLineSeparator();
 915                         } else {
 916                           currentLine = null;
 917                         }
 918                         for (int i = 0; i < heredocLength; i++) {
 919                           if (source[currentPosition + i] != source[heredocStart + i]) {
 920                             endTag = false;
 921                             break;
 922                           }
 923                         }
 924                         if (endTag) {
 925                           currentPosition += heredocLength - 1;
 926                           currentCharacter = source[currentPosition++];
 927                           break; // do...while loop
 928                         } else {
 929                           endTag = true;
 930                         }
 931                       }
 932
 933                     } while (true);
 934
 935                     return TokenNameHEREDOC;
 936                   }
 937                   return TokenNameLEFT_SHIFT;
 938                 }
 939                 return TokenNameLESS;
 940               }
 941             case '>' :
 942               {
 943                 int test;
 944                 if ((test = getNextChar('=', '>')) == 0)
 945                   return TokenNameGREATER_EQUAL;
 946                 if (test > 0) {
 947                   if ((test = getNextChar('=', '>')) == 0)
 948                     return TokenNameRIGHT_SHIFT_EQUAL;
 949                   return TokenNameRIGHT_SHIFT;
 950                 }
 951                 return TokenNameGREATER;
 952               }
 953             case '=' :
 954               if (getNextChar('=')) {
 955                 if (getNextChar('=')) {
 956                   return TokenNameEQUAL_EQUAL_EQUAL;
 957                 }
 958                 return TokenNameEQUAL_EQUAL;
 959               }
 960               if (getNextChar('>'))
 961                 return TokenNameEQUAL_GREATER;
 962               return TokenNameEQUAL;
 963             case '&' :
 964               {
 965                 int test;
 966                 if ((test = getNextChar('&', '=')) == 0)
 967                   return TokenNameAND_AND;
 968                 if (test > 0)
 969                   return TokenNameAND_EQUAL;
 970                 return TokenNameAND;
 971               }
 972             case '|' :
 973               {
 974                 int test;
 975                 if ((test = getNextChar('|', '=')) == 0)
 976                   return TokenNameOR_OR;
 977                 if (test > 0)
 978                   return TokenNameOR_EQUAL;
 979                 return TokenNameOR;
 980               }
 981             case '^' :
 982               if (getNextChar('='))
 983                 return TokenNameXOR_EQUAL;
 984               return TokenNameXOR;
 985             case '?' :
 986               if (getNextChar('>')) {
 987                 phpMode = false;
 988                 return TokenNameStopPHP;
 989               }
 990               return TokenNameQUESTION;
 991             case ':' :
 992               if (getNextChar(':'))
 993                 return TokenNameCOLON_COLON;
 994               return TokenNameCOLON;
 995             case '@' :
 996               return TokenNameAT;
 997               //                                        case '\'' :
 998               //                                                {
 999               //                                                        int test;
1000               //                                                        if ((test = getNextChar('\n', '\r')) == 0) {
1001               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1002               //                                                        }
1003               //                                                        if (test > 0) {
1004               //                                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1005               //                                                                for (int lookAhead = 0;
1006               //                                                                        lookAhead < 3;
1007               //                                                                        lookAhead++) {
1008               //                                                                        if (currentPosition + lookAhead
1009               //                                                                                == source.length)
1010               //                                                                                break;
1011               //                                                                        if (source[currentPosition + lookAhead]
1012               //                                                                                == '\n')
1013               //                                                                                break;
1014               //                                                                        if (source[currentPosition + lookAhead]
1015               //                                                                                == '\'') {
1016               //                                                                                currentPosition += lookAhead + 1;
1017               //                                                                                break;
1018               //                                                                        }
1019               //                                                                }
1020               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1021               //                                                        }
1022               //                                                }
1023               //                                                if (getNextChar('\'')) {
1024               //                                                        // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1025               //                                                        for (int lookAhead = 0;
1026               //                                                                lookAhead < 3;
1027               //                                                                lookAhead++) {
1028               //                                                                if (currentPosition + lookAhead
1029               //                                                                        == source.length)
1030               //                                                                        break;
1031               //                                                                if (source[currentPosition + lookAhead]
1032               //                                                                        == '\n')
1033               //                                                                        break;
1034               //                                                                if (source[currentPosition + lookAhead]
1035               //                                                                        == '\'') {
1036               //                                                                        currentPosition += lookAhead + 1;
1037               //                                                                        break;
1038               //                                                                }
1039               //                                                        }
1040               //                                                        throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1041               //                                                }
1042               //                                                if (getNextChar('\\'))
1043               //                                                        scanEscapeCharacter();
1044               //                                                else { // consume next character
1045               //                                                        unicodeAsBackSlash = false;
1046               //                                                        if (((currentCharacter = source[currentPosition++])
1047               //                                                                == '\\')
1048               //                                                                && (source[currentPosition] == 'u')) {
1049               //                                                                getNextUnicodeChar();
1050               //                                                        } else {
1051               //                                                                if (withoutUnicodePtr != 0) {
1052               //                                                                        withoutUnicodeBuffer[++withoutUnicodePtr] =
1053               //                                                                                currentCharacter;
1054               //                                                                }
1055               //                                                        }
1056               //                                                }
1057               //                                                //            if (getNextChar('\''))
1058               //                                                //              return TokenNameCharacterLiteral;
1059               //                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1060               //                                                for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1061               //                                                        if (currentPosition + lookAhead == source.length)
1062               //                                                                break;
1063               //                                                        if (source[currentPosition + lookAhead] == '\n')
1064               //                                                                break;
1065               //                                                        if (source[currentPosition + lookAhead] == '\'') {
1066               //                                                                currentPosition += lookAhead + 1;
1067               //                                                                break;
1068               //                                                        }
1069               //                                                }
1070               //                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1071             case '\'' :
1072               try {
1073                 // consume next character
1074                 unicodeAsBackSlash = false;
1075                 currentCharacter = source[currentPosition++];
1076                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1077                 //                  && (source[currentPosition] == 'u')) {
1078                 //                  getNextUnicodeChar();
1079                 //                } else {
1080                 //                  if (withoutUnicodePtr != 0) {
1081                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1082                 //                      currentCharacter;
1083                 //                  }
1084                 //                }
1085
1086                 while (currentCharacter != '\'') {
1087
1088                   /**** in PHP \r and \n are valid in string literals ****/
1089                   //                  if ((currentCharacter == '\n')
1090                   //                    || (currentCharacter == '\r')) {
1091                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1092                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1093                   //                      if (currentPosition + lookAhead == source.length)
1094                   //                        break;
1095                   //                      if (source[currentPosition + lookAhead] == '\n')
1096                   //                        break;
1097                   //                      if (source[currentPosition + lookAhead] == '\"') {
1098                   //                        currentPosition += lookAhead + 1;
1099                   //                        break;
1100                   //                      }
1101                   //                    }
1102                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1103                   //                  }
1104                   if (currentCharacter == '\\') {
1105                     int escapeSize = currentPosition;
1106                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1107                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1108                     scanSingleQuotedEscapeCharacter();
1109                     escapeSize = currentPosition - escapeSize;
1110                     if (withoutUnicodePtr == 0) {
1111                       //buffer all the entries that have been left aside....
1112                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1113                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1114                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1115                     } else { //overwrite the / in the buffer
1116                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1117                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1118                         withoutUnicodePtr--;
1119                       }
1120                     }
1121                   }
1122                   // consume next character
1123                   unicodeAsBackSlash = false;
1124                   currentCharacter = source[currentPosition++];
1125                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1126                   //                    && (source[currentPosition] == 'u')) {
1127                   //                    getNextUnicodeChar();
1128                   //                  } else {
1129                   if (withoutUnicodePtr != 0) {
1130                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1131                   }
1132                   //                  }
1133
1134                 }
1135               } catch (IndexOutOfBoundsException e) {
1136                 throw new InvalidInputException(UNTERMINATED_STRING);
1137               } catch (InvalidInputException e) {
1138                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1139                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1140                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1141                     if (currentPosition + lookAhead == source.length)
1142                       break;
1143                     if (source[currentPosition + lookAhead] == '\n')
1144                       break;
1145                     if (source[currentPosition + lookAhead] == '\'') {
1146                       currentPosition += lookAhead + 1;
1147                       break;
1148                     }
1149                   }
1150
1151                 }
1152                 throw e; // rethrow
1153               }
1154               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1155                 if (currentLine == null) {
1156                   currentLine = new NLSLine();
1157                   lines.add(currentLine);
1158                 }
1159                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1160               }
1161               return TokenNameStringConstant;
1162             case '"' :
1163               try {
1164                 // consume next character
1165                 unicodeAsBackSlash = false;
1166                 currentCharacter = source[currentPosition++];
1167                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1168                 //                  && (source[currentPosition] == 'u')) {
1169                 //                  getNextUnicodeChar();
1170                 //                } else {
1171                 //                  if (withoutUnicodePtr != 0) {
1172                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1173                 //                      currentCharacter;
1174                 //                  }
1175                 //                }
1176
1177                 while (currentCharacter != '"') {
1178
1179                   /**** in PHP \r and \n are valid in string literals ****/
1180                   //                  if ((currentCharacter == '\n')
1181                   //                    || (currentCharacter == '\r')) {
1182                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1183                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1184                   //                      if (currentPosition + lookAhead == source.length)
1185                   //                        break;
1186                   //                      if (source[currentPosition + lookAhead] == '\n')
1187                   //                        break;
1188                   //                      if (source[currentPosition + lookAhead] == '\"') {
1189                   //                        currentPosition += lookAhead + 1;
1190                   //                        break;
1191                   //                      }
1192                   //                    }
1193                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1194                   //                  }
1195                   if (currentCharacter == '\\') {
1196                     int escapeSize = currentPosition;
1197                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1198                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1199                     scanDoubleQuotedEscapeCharacter();
1200                     escapeSize = currentPosition - escapeSize;
1201                     if (withoutUnicodePtr == 0) {
1202                       //buffer all the entries that have been left aside....
1203                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1204                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1205                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1206                     } else { //overwrite the / in the buffer
1207                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1208                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1209                         withoutUnicodePtr--;
1210                       }
1211                     }
1212                   }
1213                   // consume next character
1214                   unicodeAsBackSlash = false;
1215                   currentCharacter = source[currentPosition++];
1216                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1217                   //                    && (source[currentPosition] == 'u')) {
1218                   //                    getNextUnicodeChar();
1219                   //                  } else {
1220                   if (withoutUnicodePtr != 0) {
1221                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1222                   }
1223                   //                  }
1224
1225                 }
1226               } catch (IndexOutOfBoundsException e) {
1227                 throw new InvalidInputException(UNTERMINATED_STRING);
1228               } catch (InvalidInputException e) {
1229                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1230                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1231                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1232                     if (currentPosition + lookAhead == source.length)
1233                       break;
1234                     if (source[currentPosition + lookAhead] == '\n')
1235                       break;
1236                     if (source[currentPosition + lookAhead] == '\"') {
1237                       currentPosition += lookAhead + 1;
1238                       break;
1239                     }
1240                   }
1241
1242                 }
1243                 throw e; // rethrow
1244               }
1245               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1246                 if (currentLine == null) {
1247                   currentLine = new NLSLine();
1248                   lines.add(currentLine);
1249                 }
1250                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1251               }
1252               return TokenNameStringLiteral;
1253             case '`' :
1254               try {
1255                 // consume next character
1256                 unicodeAsBackSlash = false;
1257                 currentCharacter = source[currentPosition++];
1258                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1259                 //                  && (source[currentPosition] == 'u')) {
1260                 //                  getNextUnicodeChar();
1261                 //                } else {
1262                 //                  if (withoutUnicodePtr != 0) {
1263                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1264                 //                      currentCharacter;
1265                 //                  }
1266                 //                }
1267
1268                 while (currentCharacter != '`') {
1269
1270                   /**** in PHP \r and \n are valid in string literals ****/
1271                   //                if ((currentCharacter == '\n')
1272                   //                  || (currentCharacter == '\r')) {
1273                   //                  // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1274                   //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1275                   //                    if (currentPosition + lookAhead == source.length)
1276                   //                      break;
1277                   //                    if (source[currentPosition + lookAhead] == '\n')
1278                   //                      break;
1279                   //                    if (source[currentPosition + lookAhead] == '\"') {
1280                   //                      currentPosition += lookAhead + 1;
1281                   //                      break;
1282                   //                    }
1283                   //                  }
1284                   //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1285                   //                }
1286                   if (currentCharacter == '\\') {
1287                     int escapeSize = currentPosition;
1288                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1289                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1290                     scanDoubleQuotedEscapeCharacter();
1291                     escapeSize = currentPosition - escapeSize;
1292                     if (withoutUnicodePtr == 0) {
1293                       //buffer all the entries that have been left aside....
1294                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1295                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1296                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1297                     } else { //overwrite the / in the buffer
1298                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1299                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1300                         withoutUnicodePtr--;
1301                       }
1302                     }
1303                   }
1304                   // consume next character
1305                   unicodeAsBackSlash = false;
1306                   currentCharacter = source[currentPosition++];
1307                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1308                   //                    && (source[currentPosition] == 'u')) {
1309                   //                    getNextUnicodeChar();
1310                   //                  } else {
1311                   if (withoutUnicodePtr != 0) {
1312                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1313                   }
1314                   //                  }
1315
1316                 }
1317               } catch (IndexOutOfBoundsException e) {
1318                 throw new InvalidInputException(UNTERMINATED_STRING);
1319               } catch (InvalidInputException e) {
1320                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1321                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1322                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1323                     if (currentPosition + lookAhead == source.length)
1324                       break;
1325                     if (source[currentPosition + lookAhead] == '\n')
1326                       break;
1327                     if (source[currentPosition + lookAhead] == '`') {
1328                       currentPosition += lookAhead + 1;
1329                       break;
1330                     }
1331                   }
1332
1333                 }
1334                 throw e; // rethrow
1335               }
1336               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1337                 if (currentLine == null) {
1338                   currentLine = new NLSLine();
1339                   lines.add(currentLine);
1340                 }
1341                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1342               }
1343               return TokenNameStringInterpolated;
1344             case '#' :
1345             case '/' :
1346               {
1347                 int test;
1348                 if ((currentCharacter == '#') || (test = getNextChar('/', '*')) == 0) {
1349                   //line comment
1350                   int endPositionForLineComment = 0;
1351                   try { //get the next char
1352                     currentCharacter = source[currentPosition++];
1353                     //                    if (((currentCharacter = source[currentPosition++])
1354                     //                      == '\\')
1355                     //                      && (source[currentPosition] == 'u')) {
1356                     //                      //-------------unicode traitement ------------
1357                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1358                     //                      currentPosition++;
1359                     //                      while (source[currentPosition] == 'u') {
1360                     //                        currentPosition++;
1361                     //                      }
1362                     //                      if ((c1 =
1363                     //                        Character.getNumericValue(source[currentPosition++]))
1364                     //                        > 15
1365                     //                        || c1 < 0
1366                     //                        || (c2 =
1367                     //                          Character.getNumericValue(source[currentPosition++]))
1368                     //                          > 15
1369                     //                        || c2 < 0
1370                     //                        || (c3 =
1371                     //                          Character.getNumericValue(source[currentPosition++]))
1372                     //                          > 15
1373                     //                        || c3 < 0
1374                     //                        || (c4 =
1375                     //                          Character.getNumericValue(source[currentPosition++]))
1376                     //                          > 15
1377                     //                        || c4 < 0) {
1378                     //                        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1379                     //                      } else {
1380                     //                        currentCharacter =
1381                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1382                     //                      }
1383                     //                    }
1384
1385                     //handle the \\u case manually into comment
1386                     //                    if (currentCharacter == '\\') {
1387                     //                      if (source[currentPosition] == '\\')
1388                     //                        currentPosition++;
1389                     //                    } //jump over the \\
1390                     boolean isUnicode = false;
1391                     while (currentCharacter != '\r' && currentCharacter != '\n') {
1392                       if (currentCharacter == '?') {
1393                         if (getNextChar('>')) {
1394                           startPosition = currentPosition - 2;
1395                           phpMode = false;
1396                           return TokenNameStopPHP;
1397                         }
1398                       }
1399
1400                       //get the next char
1401                       isUnicode = false;
1402                       currentCharacter = source[currentPosition++];
1403                       //                      if (((currentCharacter = source[currentPosition++])
1404                       //                        == '\\')
1405                       //                        && (source[currentPosition] == 'u')) {
1406                       //                        isUnicode = true;
1407                       //                        //-------------unicode traitement ------------
1408                       //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1409                       //                        currentPosition++;
1410                       //                        while (source[currentPosition] == 'u') {
1411                       //                          currentPosition++;
1412                       //                        }
1413                       //                        if ((c1 =
1414                       //                          Character.getNumericValue(source[currentPosition++]))
1415                       //                          > 15
1416                       //                          || c1 < 0
1417                       //                          || (c2 =
1418                       //                            Character.getNumericValue(
1419                       //                              source[currentPosition++]))
1420                       //                            > 15
1421                       //                          || c2 < 0
1422                       //                          || (c3 =
1423                       //                            Character.getNumericValue(
1424                       //                              source[currentPosition++]))
1425                       //                            > 15
1426                       //                          || c3 < 0
1427                       //                          || (c4 =
1428                       //                            Character.getNumericValue(
1429                       //                              source[currentPosition++]))
1430                       //                            > 15
1431                       //                          || c4 < 0) {
1432                       //                          throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1433                       //                        } else {
1434                       //                          currentCharacter =
1435                       //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1436                       //                        }
1437                       //                      }
1438                       //handle the \\u case manually into comment
1439                       //                      if (currentCharacter == '\\') {
1440                       //                        if (source[currentPosition] == '\\')
1441                       //                          currentPosition++;
1442                       //                      } //jump over the \\
1443                     }
1444                     if (isUnicode) {
1445                       endPositionForLineComment = currentPosition - 6;
1446                     } else {
1447                       endPositionForLineComment = currentPosition - 1;
1448                     }
1449                     recordComment(false);
1450                     if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1451                       checkNonExternalizeString();
1452                       if (recordLineSeparator) {
1453                         if (isUnicode) {
1454                           pushUnicodeLineSeparator();
1455                         } else {
1456                           pushLineSeparator();
1457                         }
1458                       } else {
1459                         currentLine = null;
1460                       }
1461                     }
1462                     if (tokenizeComments) {
1463                       if (!isUnicode) {
1464                         currentPosition = endPositionForLineComment;
1465                         // reset one character behind
1466                       }
1467                       return TokenNameCOMMENT_LINE;
1468                     }
1469                   } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1470                     if (tokenizeComments) {
1471                       currentPosition--;
1472                       // reset one character behind
1473                       return TokenNameCOMMENT_LINE;
1474                     }
1475                   }
1476                   break;
1477                 }
1478                 if (test > 0) {
1479                   //traditional and annotation comment
1480                   boolean isJavadoc = false, star = false;
1481                   // consume next character
1482                   unicodeAsBackSlash = false;
1483                   currentCharacter = source[currentPosition++];
1484                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1485                   //                    && (source[currentPosition] == 'u')) {
1486                   //                    getNextUnicodeChar();
1487                   //                  } else {
1488                   //                    if (withoutUnicodePtr != 0) {
1489                   //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1490                   //                        currentCharacter;
1491                   //                    }
1492                   //                  }
1493
1494                   if (currentCharacter == '*') {
1495                     isJavadoc = true;
1496                     star = true;
1497                   }
1498                   if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1499                     checkNonExternalizeString();
1500                     if (recordLineSeparator) {
1501                       pushLineSeparator();
1502                     } else {
1503                       currentLine = null;
1504                     }
1505                   }
1506                   try { //get the next char
1507                     currentCharacter = source[currentPosition++];
1508                     //                    if (((currentCharacter = source[currentPosition++])
1509                     //                      == '\\')
1510                     //                      && (source[currentPosition] == 'u')) {
1511                     //                      //-------------unicode traitement ------------
1512                     //                      getNextUnicodeChar();
1513                     //                    }
1514                     //handle the \\u case manually into comment
1515                     //                    if (currentCharacter == '\\') {
1516                     //                      if (source[currentPosition] == '\\')
1517                     //                        currentPosition++;
1518                     //                      //jump over the \\
1519                     //                    }
1520                     // empty comment is not a javadoc /**/
1521                     if (currentCharacter == '/') {
1522                       isJavadoc = false;
1523                     }
1524                     //loop until end of comment */
1525                     while ((currentCharacter != '/') || (!star)) {
1526                       if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1527                         checkNonExternalizeString();
1528                         if (recordLineSeparator) {
1529                           pushLineSeparator();
1530                         } else {
1531                           currentLine = null;
1532                         }
1533                       }
1534                       star = currentCharacter == '*';
1535                       //get next char
1536                       currentCharacter = source[currentPosition++];
1537                       //                      if (((currentCharacter = source[currentPosition++])
1538                       //                        == '\\')
1539                       //                        && (source[currentPosition] == 'u')) {
1540                       //                        //-------------unicode traitement ------------
1541                       //                        getNextUnicodeChar();
1542                       //                      }
1543                       //handle the \\u case manually into comment
1544                       //                      if (currentCharacter == '\\') {
1545                       //                        if (source[currentPosition] == '\\')
1546                       //                          currentPosition++;
1547                       //                      } //jump over the \\
1548                     }
1549                     recordComment(isJavadoc);
1550                     if (tokenizeComments) {
1551                       if (isJavadoc)
1552                         return TokenNameCOMMENT_PHPDOC;
1553                       return TokenNameCOMMENT_BLOCK;
1554                     }
1555                   } catch (IndexOutOfBoundsException e) {
1556                     throw new InvalidInputException(UNTERMINATED_COMMENT);
1557                   }
1558                   break;
1559                 }
1560                 if (getNextChar('='))
1561                   return TokenNameDIVIDE_EQUAL;
1562                 return TokenNameDIVIDE;
1563               }
1564             case '\u001a' :
1565               if (atEnd())
1566                 return TokenNameEOF;
1567               //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1568               throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1569
1570             default :
1571               if (currentCharacter == '$') {
1572                 while ((currentCharacter = source[currentPosition++]) == '$') {
1573                 }
1574                 if (currentCharacter == '{')
1575                   return TokenNameDOLLAR_LBRACE;
1576                 if (isPHPIdentifierStart(currentCharacter))
1577                   return scanIdentifierOrKeyword(true);
1578                 return TokenNameERROR;
1579               }
1580               if (isPHPIdentifierStart(currentCharacter))
1581                 return scanIdentifierOrKeyword(false);
1582               if (Character.isDigit(currentCharacter))
1583                 return scanNumber(false);
1584               return TokenNameERROR;
1585           }
1586         }
1587       } //-----------------end switch while try--------------------
1588       catch (IndexOutOfBoundsException e) {
1589       }
1590     }
1591     return TokenNameEOF;
1592   }
1593
1594   //  public final void getNextUnicodeChar()
1595   //    throws IndexOutOfBoundsException, InvalidInputException {
1596   //    //VOID
1597   //    //handle the case of unicode.
1598   //    //when a unicode appears then we must use a buffer that holds char internal values
1599   //    //At the end of this method currentCharacter holds the new visited char
1600   //    //and currentPosition points right next after it
1601   //
1602   //    //ALL getNextChar.... ARE OPTIMIZED COPIES
1603   //
1604   //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1605   //    currentPosition++;
1606   //    while (source[currentPosition] == 'u') {
1607   //      currentPosition++;
1608   //      unicodeSize++;
1609   //    }
1610   //
1611   //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1612   //      || c1 < 0
1613   //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1614   //      || c2 < 0
1615   //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1616   //      || c3 < 0
1617   //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1618   //      || c4 < 0) {
1619   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1620   //    } else {
1621   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1622   //      //need the unicode buffer
1623   //      if (withoutUnicodePtr == 0) {
1624   //        //buffer all the entries that have been left aside....
1625   //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1626   //        System.arraycopy(
1627   //          source,
1628   //          startPosition,
1629   //          withoutUnicodeBuffer,
1630   //          1,
1631   //          withoutUnicodePtr);
1632   //      }
1633   //      //fill the buffer with the char
1634   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1635   //    }
1636   //    unicodeAsBackSlash = currentCharacter == '\\';
1637   //  }
1638   /* Tokenize a method body, assuming that curly brackets are properly balanced.
1639    */
1640   public final void jumpOverMethodBody() {
1641
1642     this.wasAcr = false;
1643     int found = 1;
1644     try {
1645       while (true) { //loop for jumping over comments
1646         // ---------Consume white space and handles startPosition---------
1647         boolean isWhiteSpace;
1648         do {
1649           startPosition = currentPosition;
1650           currentCharacter = source[currentPosition++];
1651           //          if (((currentCharacter = source[currentPosition++]) == '\\')
1652           //            && (source[currentPosition] == 'u')) {
1653           //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
1654           //          } else {
1655           if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1656             pushLineSeparator();
1657           isWhiteSpace = Character.isWhitespace(currentCharacter);
1658           //          }
1659         } while (isWhiteSpace);
1660
1661         // -------consume token until } is found---------
1662         switch (currentCharacter) {
1663           case '{' :
1664             found++;
1665             break;
1666           case '}' :
1667             found--;
1668             if (found == 0)
1669               return;
1670             break;
1671           case '\'' :
1672             {
1673               boolean test;
1674               test = getNextChar('\\');
1675               if (test) {
1676                 try {
1677                   scanDoubleQuotedEscapeCharacter();
1678                 } catch (InvalidInputException ex) {
1679                 };
1680               } else {
1681                 //                try { // consume next character
1682                 unicodeAsBackSlash = false;
1683                 currentCharacter = source[currentPosition++];
1684                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1685                 //                    && (source[currentPosition] == 'u')) {
1686                 //                    getNextUnicodeChar();
1687                 //                  } else {
1688                 if (withoutUnicodePtr != 0) {
1689                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1690                 }
1691                 //                  }
1692                 //                } catch (InvalidInputException ex) {
1693                 //                };
1694               }
1695               getNextChar('\'');
1696               break;
1697             }
1698           case '"' :
1699             try {
1700               //              try { // consume next character
1701               unicodeAsBackSlash = false;
1702               currentCharacter = source[currentPosition++];
1703               //                if (((currentCharacter = source[currentPosition++]) == '\\')
1704               //                  && (source[currentPosition] == 'u')) {
1705               //                  getNextUnicodeChar();
1706               //                } else {
1707               if (withoutUnicodePtr != 0) {
1708                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1709               }
1710               //                }
1711               //              } catch (InvalidInputException ex) {
1712               //              };
1713               while (currentCharacter != '"') {
1714                 if (currentCharacter == '\r') {
1715                   if (source[currentPosition] == '\n')
1716                     currentPosition++;
1717                   break;
1718                   // the string cannot go further that the line
1719                 }
1720                 if (currentCharacter == '\n') {
1721                   break;
1722                   // the string cannot go further that the line
1723                 }
1724                 if (currentCharacter == '\\') {
1725                   try {
1726                     scanDoubleQuotedEscapeCharacter();
1727                   } catch (InvalidInputException ex) {
1728                   };
1729                 }
1730                 //                try { // consume next character
1731                 unicodeAsBackSlash = false;
1732                 currentCharacter = source[currentPosition++];
1733                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1734                 //                    && (source[currentPosition] == 'u')) {
1735                 //                    getNextUnicodeChar();
1736                 //                  } else {
1737                 if (withoutUnicodePtr != 0) {
1738                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1739                 }
1740                 //                  }
1741                 //                } catch (InvalidInputException ex) {
1742                 //                };
1743               }
1744             } catch (IndexOutOfBoundsException e) {
1745               return;
1746             }
1747             break;
1748           case '/' :
1749             {
1750               int test;
1751               if ((test = getNextChar('/', '*')) == 0) {
1752                 //line comment
1753                 try {
1754                   //get the next char
1755                   currentCharacter = source[currentPosition++];
1756                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1757                   //                    && (source[currentPosition] == 'u')) {
1758                   //                    //-------------unicode traitement ------------
1759                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1760                   //                    currentPosition++;
1761                   //                    while (source[currentPosition] == 'u') {
1762                   //                      currentPosition++;
1763                   //                    }
1764                   //                    if ((c1 =
1765                   //                      Character.getNumericValue(source[currentPosition++]))
1766                   //                      > 15
1767                   //                      || c1 < 0
1768                   //                      || (c2 =
1769                   //                        Character.getNumericValue(source[currentPosition++]))
1770                   //                        > 15
1771                   //                      || c2 < 0
1772                   //                      || (c3 =
1773                   //                        Character.getNumericValue(source[currentPosition++]))
1774                   //                        > 15
1775                   //                      || c3 < 0
1776                   //                      || (c4 =
1777                   //                        Character.getNumericValue(source[currentPosition++]))
1778                   //                        > 15
1779                   //                      || c4 < 0) {
1780                   //                      //error don't care of the value
1781                   //                      currentCharacter = 'A';
1782                   //                    } //something different from \n and \r
1783                   //                    else {
1784                   //                      currentCharacter =
1785                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1786                   //                    }
1787                   //                  }
1788
1789                   while (currentCharacter != '\r' && currentCharacter != '\n') {
1790                     //get the next char
1791                     currentCharacter = source[currentPosition++];
1792                     //                    if (((currentCharacter = source[currentPosition++])
1793                     //                      == '\\')
1794                     //                      && (source[currentPosition] == 'u')) {
1795                     //                      //-------------unicode traitement ------------
1796                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1797                     //                      currentPosition++;
1798                     //                      while (source[currentPosition] == 'u') {
1799                     //                        currentPosition++;
1800                     //                      }
1801                     //                      if ((c1 =
1802                     //                        Character.getNumericValue(source[currentPosition++]))
1803                     //                        > 15
1804                     //                        || c1 < 0
1805                     //                        || (c2 =
1806                     //                          Character.getNumericValue(source[currentPosition++]))
1807                     //                          > 15
1808                     //                        || c2 < 0
1809                     //                        || (c3 =
1810                     //                          Character.getNumericValue(source[currentPosition++]))
1811                     //                          > 15
1812                     //                        || c3 < 0
1813                     //                        || (c4 =
1814                     //                          Character.getNumericValue(source[currentPosition++]))
1815                     //                          > 15
1816                     //                        || c4 < 0) {
1817                     //                        //error don't care of the value
1818                     //                        currentCharacter = 'A';
1819                     //                      } //something different from \n and \r
1820                     //                      else {
1821                     //                        currentCharacter =
1822                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1823                     //                      }
1824                     //                    }
1825                   }
1826                   if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1827                     pushLineSeparator();
1828                 } catch (IndexOutOfBoundsException e) {
1829                 } //an eof will them be generated
1830                 break;
1831               }
1832               if (test > 0) {
1833                 //traditional and annotation comment
1834                 boolean star = false;
1835                 //                try { // consume next character
1836                 unicodeAsBackSlash = false;
1837                 currentCharacter = source[currentPosition++];
1838                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1839                 //                    && (source[currentPosition] == 'u')) {
1840                 //                    getNextUnicodeChar();
1841                 //                  } else {
1842                 if (withoutUnicodePtr != 0) {
1843                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1844                 }
1845                 //                  };
1846                 //                } catch (InvalidInputException ex) {
1847                 //                };
1848                 if (currentCharacter == '*') {
1849                   star = true;
1850                 }
1851                 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1852                   pushLineSeparator();
1853                 try { //get the next char
1854                   currentCharacter = source[currentPosition++];
1855                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1856                   //                    && (source[currentPosition] == 'u')) {
1857                   //                    //-------------unicode traitement ------------
1858                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1859                   //                    currentPosition++;
1860                   //                    while (source[currentPosition] == 'u') {
1861                   //                      currentPosition++;
1862                   //                    }
1863                   //                    if ((c1 =
1864                   //                      Character.getNumericValue(source[currentPosition++]))
1865                   //                      > 15
1866                   //                      || c1 < 0
1867                   //                      || (c2 =
1868                   //                        Character.getNumericValue(source[currentPosition++]))
1869                   //                        > 15
1870                   //                      || c2 < 0
1871                   //                      || (c3 =
1872                   //                        Character.getNumericValue(source[currentPosition++]))
1873                   //                        > 15
1874                   //                      || c3 < 0
1875                   //                      || (c4 =
1876                   //                        Character.getNumericValue(source[currentPosition++]))
1877                   //                        > 15
1878                   //                      || c4 < 0) {
1879                   //                      //error don't care of the value
1880                   //                      currentCharacter = 'A';
1881                   //                    } //something different from * and /
1882                   //                    else {
1883                   //                      currentCharacter =
1884                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1885                   //                    }
1886                   //                  }
1887                   //loop until end of comment */
1888                   while ((currentCharacter != '/') || (!star)) {
1889                     if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1890                       pushLineSeparator();
1891                     star = currentCharacter == '*';
1892                     //get next char
1893                     currentCharacter = source[currentPosition++];
1894                     //                    if (((currentCharacter = source[currentPosition++])
1895                     //                      == '\\')
1896                     //                      && (source[currentPosition] == 'u')) {
1897                     //                      //-------------unicode traitement ------------
1898                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1899                     //                      currentPosition++;
1900                     //                      while (source[currentPosition] == 'u') {
1901                     //                        currentPosition++;
1902                     //                      }
1903                     //                      if ((c1 =
1904                     //                        Character.getNumericValue(source[currentPosition++]))
1905                     //                        > 15
1906                     //                        || c1 < 0
1907                     //                        || (c2 =
1908                     //                          Character.getNumericValue(source[currentPosition++]))
1909                     //                          > 15
1910                     //                        || c2 < 0
1911                     //                        || (c3 =
1912                     //                          Character.getNumericValue(source[currentPosition++]))
1913                     //                          > 15
1914                     //                        || c3 < 0
1915                     //                        || (c4 =
1916                     //                          Character.getNumericValue(source[currentPosition++]))
1917                     //                          > 15
1918                     //                        || c4 < 0) {
1919                     //                        //error don't care of the value
1920                     //                        currentCharacter = 'A';
1921                     //                      } //something different from * and /
1922                     //                      else {
1923                     //                        currentCharacter =
1924                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1925                     //                      }
1926                     //                    }
1927                   }
1928                 } catch (IndexOutOfBoundsException e) {
1929                   return;
1930                 }
1931                 break;
1932               }
1933               break;
1934             }
1935
1936           default :
1937             if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
1938               try {
1939                 scanIdentifierOrKeyword((currentCharacter == '$'));
1940               } catch (InvalidInputException ex) {
1941               };
1942               break;
1943             }
1944             if (Character.isDigit(currentCharacter)) {
1945               try {
1946                 scanNumber(false);
1947               } catch (InvalidInputException ex) {
1948               };
1949               break;
1950             }
1951         }
1952       }
1953       //-----------------end switch while try--------------------
1954     } catch (IndexOutOfBoundsException e) {
1955     } catch (InvalidInputException e) {
1956     }
1957     return;
1958   }
1959   //  public final boolean jumpOverUnicodeWhiteSpace()
1960   //    throws InvalidInputException {
1961   //    //BOOLEAN
1962   //    //handle the case of unicode. Jump over the next whiteSpace
1963   //    //making startPosition pointing on the next available char
1964   //    //On false, the currentCharacter is filled up with a potential
1965   //    //correct char
1966   //
1967   //    try {
1968   //      this.wasAcr = false;
1969   //      int c1, c2, c3, c4;
1970   //      int unicodeSize = 6;
1971   //      currentPosition++;
1972   //      while (source[currentPosition] == 'u') {
1973   //        currentPosition++;
1974   //        unicodeSize++;
1975   //      }
1976   //
1977   //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1978   //        || c1 < 0)
1979   //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
1980   //          || c2 < 0)
1981   //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
1982   //          || c3 < 0)
1983   //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
1984   //          || c4 < 0)) {
1985   //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1986   //      }
1987   //
1988   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1989   //      if (recordLineSeparator
1990   //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1991   //        pushLineSeparator();
1992   //      if (Character.isWhitespace(currentCharacter))
1993   //        return true;
1994   //
1995   //      //buffer the new char which is not a white space
1996   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1997   //      //withoutUnicodePtr == 1 is true here
1998   //      return false;
1999   //    } catch (IndexOutOfBoundsException e) {
2000   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2001   //    }
2002   //  }
2003   public final int[] getLineEnds() {
2004     //return a bounded copy of this.lineEnds
2005
2006     int[] copy;
2007     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2008     return copy;
2009   }
2010
2011   public char[] getSource() {
2012     return this.source;
2013   }
2014   final char[] optimizedCurrentTokenSource1() {
2015     //return always the same char[] build only once
2016
2017     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2018     char charOne = source[startPosition];
2019     switch (charOne) {
2020       case 'a' :
2021         return charArray_a;
2022       case 'b' :
2023         return charArray_b;
2024       case 'c' :
2025         return charArray_c;
2026       case 'd' :
2027         return charArray_d;
2028       case 'e' :
2029         return charArray_e;
2030       case 'f' :
2031         return charArray_f;
2032       case 'g' :
2033         return charArray_g;
2034       case 'h' :
2035         return charArray_h;
2036       case 'i' :
2037         return charArray_i;
2038       case 'j' :
2039         return charArray_j;
2040       case 'k' :
2041         return charArray_k;
2042       case 'l' :
2043         return charArray_l;
2044       case 'm' :
2045         return charArray_m;
2046       case 'n' :
2047         return charArray_n;
2048       case 'o' :
2049         return charArray_o;
2050       case 'p' :
2051         return charArray_p;
2052       case 'q' :
2053         return charArray_q;
2054       case 'r' :
2055         return charArray_r;
2056       case 's' :
2057         return charArray_s;
2058       case 't' :
2059         return charArray_t;
2060       case 'u' :
2061         return charArray_u;
2062       case 'v' :
2063         return charArray_v;
2064       case 'w' :
2065         return charArray_w;
2066       case 'x' :
2067         return charArray_x;
2068       case 'y' :
2069         return charArray_y;
2070       case 'z' :
2071         return charArray_z;
2072       default :
2073         return new char[] { charOne };
2074     }
2075   }
2076
2077   final char[] optimizedCurrentTokenSource2() {
2078     //try to return the same char[] build only once
2079
2080     char c0, c1;
2081     int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) % TableSize;
2082     char[][] table = charArray_length[0][hash];
2083     int i = newEntry2;
2084     while (++i < InternalTableSize) {
2085       char[] charArray = table[i];
2086       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2087         return charArray;
2088     }
2089     //---------other side---------
2090     i = -1;
2091     int max = newEntry2;
2092     while (++i <= max) {
2093       char[] charArray = table[i];
2094       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2095         return charArray;
2096     }
2097     //--------add the entry-------
2098     if (++max >= InternalTableSize)
2099       max = 0;
2100     char[] r;
2101     table[max] = (r = new char[] { c0, c1 });
2102     newEntry2 = max;
2103     return r;
2104   }
2105
2106   final char[] optimizedCurrentTokenSource3() {
2107     //try to return the same char[] build only once
2108
2109     char c0, c1, c2;
2110     int hash =
2111       (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2112         % TableSize;
2113     char[][] table = charArray_length[1][hash];
2114     int i = newEntry3;
2115     while (++i < InternalTableSize) {
2116       char[] charArray = table[i];
2117       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2118         return charArray;
2119     }
2120     //---------other side---------
2121     i = -1;
2122     int max = newEntry3;
2123     while (++i <= max) {
2124       char[] charArray = table[i];
2125       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2126         return charArray;
2127     }
2128     //--------add the entry-------
2129     if (++max >= InternalTableSize)
2130       max = 0;
2131     char[] r;
2132     table[max] = (r = new char[] { c0, c1, c2 });
2133     newEntry3 = max;
2134     return r;
2135   }
2136
2137   final char[] optimizedCurrentTokenSource4() {
2138     //try to return the same char[] build only once
2139
2140     char c0, c1, c2, c3;
2141     long hash =
2142       ((((long) (c0 = source[startPosition])) << 18)
2143         + ((c1 = source[startPosition + 1]) << 12)
2144         + ((c2 = source[startPosition + 2]) << 6)
2145         + (c3 = source[startPosition + 3]))
2146         % TableSize;
2147     char[][] table = charArray_length[2][(int) hash];
2148     int i = newEntry4;
2149     while (++i < InternalTableSize) {
2150       char[] charArray = table[i];
2151       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2152         return charArray;
2153     }
2154     //---------other side---------
2155     i = -1;
2156     int max = newEntry4;
2157     while (++i <= max) {
2158       char[] charArray = table[i];
2159       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2160         return charArray;
2161     }
2162     //--------add the entry-------
2163     if (++max >= InternalTableSize)
2164       max = 0;
2165     char[] r;
2166     table[max] = (r = new char[] { c0, c1, c2, c3 });
2167     newEntry4 = max;
2168     return r;
2169
2170   }
2171
2172   final char[] optimizedCurrentTokenSource5() {
2173     //try to return the same char[] build only once
2174
2175     char c0, c1, c2, c3, c4;
2176     long hash =
2177       ((((long) (c0 = source[startPosition])) << 24)
2178         + (((long) (c1 = source[startPosition + 1])) << 18)
2179         + ((c2 = source[startPosition + 2]) << 12)
2180         + ((c3 = source[startPosition + 3]) << 6)
2181         + (c4 = source[startPosition + 4]))
2182         % TableSize;
2183     char[][] table = charArray_length[3][(int) hash];
2184     int i = newEntry5;
2185     while (++i < InternalTableSize) {
2186       char[] charArray = table[i];
2187       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2188         return charArray;
2189     }
2190     //---------other side---------
2191     i = -1;
2192     int max = newEntry5;
2193     while (++i <= max) {
2194       char[] charArray = table[i];
2195       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2196         return charArray;
2197     }
2198     //--------add the entry-------
2199     if (++max >= InternalTableSize)
2200       max = 0;
2201     char[] r;
2202     table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2203     newEntry5 = max;
2204     return r;
2205
2206   }
2207
2208   final char[] optimizedCurrentTokenSource6() {
2209     //try to return the same char[] build only once
2210
2211     char c0, c1, c2, c3, c4, c5;
2212     long hash =
2213       ((((long) (c0 = source[startPosition])) << 32)
2214         + (((long) (c1 = source[startPosition + 1])) << 24)
2215         + (((long) (c2 = source[startPosition + 2])) << 18)
2216         + ((c3 = source[startPosition + 3]) << 12)
2217         + ((c4 = source[startPosition + 4]) << 6)
2218         + (c5 = source[startPosition + 5]))
2219         % TableSize;
2220     char[][] table = charArray_length[4][(int) hash];
2221     int i = newEntry6;
2222     while (++i < InternalTableSize) {
2223       char[] charArray = table[i];
2224       if ((c0 == charArray[0])
2225         && (c1 == charArray[1])
2226         && (c2 == charArray[2])
2227         && (c3 == charArray[3])
2228         && (c4 == charArray[4])
2229         && (c5 == charArray[5]))
2230         return charArray;
2231     }
2232     //---------other side---------
2233     i = -1;
2234     int max = newEntry6;
2235     while (++i <= max) {
2236       char[] charArray = table[i];
2237       if ((c0 == charArray[0])
2238         && (c1 == charArray[1])
2239         && (c2 == charArray[2])
2240         && (c3 == charArray[3])
2241         && (c4 == charArray[4])
2242         && (c5 == charArray[5]))
2243         return charArray;
2244     }
2245     //--------add the entry-------
2246     if (++max >= InternalTableSize)
2247       max = 0;
2248     char[] r;
2249     table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2250     newEntry6 = max;
2251     return r;
2252   }
2253
2254   public final void pushLineSeparator() throws InvalidInputException {
2255     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2256     final int INCREMENT = 250;
2257
2258     if (this.checkNonExternalizedStringLiterals) {
2259       // reinitialize the current line for non externalize strings purpose
2260       currentLine = null;
2261     }
2262     //currentCharacter is at position currentPosition-1
2263
2264     // cr 000D
2265     if (currentCharacter == '\r') {
2266       int separatorPos = currentPosition - 1;
2267       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2268         return;
2269       //System.out.println("CR-" + separatorPos);
2270       try {
2271         lineEnds[++linePtr] = separatorPos;
2272       } catch (IndexOutOfBoundsException e) {
2273         //linePtr value is correct
2274         int oldLength = lineEnds.length;
2275         int[] old = lineEnds;
2276         lineEnds = new int[oldLength + INCREMENT];
2277         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2278         lineEnds[linePtr] = separatorPos;
2279       }
2280       // look-ahead for merged cr+lf
2281       try {
2282         if (source[currentPosition] == '\n') {
2283           //System.out.println("look-ahead LF-" + currentPosition);
2284           lineEnds[linePtr] = currentPosition;
2285           currentPosition++;
2286           wasAcr = false;
2287         } else {
2288           wasAcr = true;
2289         }
2290       } catch (IndexOutOfBoundsException e) {
2291         wasAcr = true;
2292       }
2293     } else {
2294       // lf 000A
2295       if (currentCharacter == '\n') {
2296         //must merge eventual cr followed by lf
2297         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2298           //System.out.println("merge LF-" + (currentPosition - 1));
2299           lineEnds[linePtr] = currentPosition - 1;
2300         } else {
2301           int separatorPos = currentPosition - 1;
2302           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2303             return;
2304           // System.out.println("LF-" + separatorPos);
2305           try {
2306             lineEnds[++linePtr] = separatorPos;
2307           } catch (IndexOutOfBoundsException e) {
2308             //linePtr value is correct
2309             int oldLength = lineEnds.length;
2310             int[] old = lineEnds;
2311             lineEnds = new int[oldLength + INCREMENT];
2312             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2313             lineEnds[linePtr] = separatorPos;
2314           }
2315         }
2316         wasAcr = false;
2317       }
2318     }
2319   }
2320   public final void pushUnicodeLineSeparator() {
2321     // isUnicode means that the \r or \n has been read as a unicode character
2322
2323     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2324
2325     final int INCREMENT = 250;
2326     //currentCharacter is at position currentPosition-1
2327
2328     if (this.checkNonExternalizedStringLiterals) {
2329       // reinitialize the current line for non externalize strings purpose
2330       currentLine = null;
2331     }
2332
2333     // cr 000D
2334     if (currentCharacter == '\r') {
2335       int separatorPos = currentPosition - 6;
2336       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2337         return;
2338       //System.out.println("CR-" + separatorPos);
2339       try {
2340         lineEnds[++linePtr] = separatorPos;
2341       } catch (IndexOutOfBoundsException e) {
2342         //linePtr value is correct
2343         int oldLength = lineEnds.length;
2344         int[] old = lineEnds;
2345         lineEnds = new int[oldLength + INCREMENT];
2346         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2347         lineEnds[linePtr] = separatorPos;
2348       }
2349       // look-ahead for merged cr+lf
2350       if (source[currentPosition] == '\n') {
2351         //System.out.println("look-ahead LF-" + currentPosition);
2352         lineEnds[linePtr] = currentPosition;
2353         currentPosition++;
2354         wasAcr = false;
2355       } else {
2356         wasAcr = true;
2357       }
2358     } else {
2359       // lf 000A
2360       if (currentCharacter == '\n') {
2361         //must merge eventual cr followed by lf
2362         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2363           //System.out.println("merge LF-" + (currentPosition - 1));
2364           lineEnds[linePtr] = currentPosition - 6;
2365         } else {
2366           int separatorPos = currentPosition - 6;
2367           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2368             return;
2369           // System.out.println("LF-" + separatorPos);
2370           try {
2371             lineEnds[++linePtr] = separatorPos;
2372           } catch (IndexOutOfBoundsException e) {
2373             //linePtr value is correct
2374             int oldLength = lineEnds.length;
2375             int[] old = lineEnds;
2376             lineEnds = new int[oldLength + INCREMENT];
2377             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2378             lineEnds[linePtr] = separatorPos;
2379           }
2380         }
2381         wasAcr = false;
2382       }
2383     }
2384   }
2385   public final void recordComment(boolean isJavadoc) {
2386
2387     // a new annotation comment is recorded
2388     try {
2389       commentStops[++commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2390     } catch (IndexOutOfBoundsException e) {
2391       int oldStackLength = commentStops.length;
2392       int[] oldStack = commentStops;
2393       commentStops = new int[oldStackLength + 30];
2394       System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2395       commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2396       //grows the positions buffers too
2397       int[] old = commentStarts;
2398       commentStarts = new int[oldStackLength + 30];
2399       System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2400     }
2401
2402     //the buffer is of a correct size here
2403     commentStarts[commentPtr] = startPosition;
2404   }
2405   public void resetTo(int begin, int end) {
2406     //reset the scanner to a given position where it may rescan again
2407
2408     diet = false;
2409     initialPosition = startPosition = currentPosition = begin;
2410     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2411     commentPtr = -1; // reset comment stack
2412   }
2413
2414   public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2415     // the string with "\\u" is a legal string of two chars \ and u
2416     //thus we use a direct access to the source (for regular cases).
2417
2418     //    if (unicodeAsBackSlash) {
2419     //      // consume next character
2420     //      unicodeAsBackSlash = false;
2421     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2422     //        && (source[currentPosition] == 'u')) {
2423     //        getNextUnicodeChar();
2424     //      } else {
2425     //        if (withoutUnicodePtr != 0) {
2426     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2427     //        }
2428     //      }
2429     //    } else
2430     currentCharacter = source[currentPosition++];
2431     switch (currentCharacter) {
2432       case '\'' :
2433         currentCharacter = '\'';
2434         break;
2435       case '\\' :
2436         currentCharacter = '\\';
2437         break;
2438       default :
2439         currentCharacter = '\\';
2440         currentPosition--;
2441     }
2442   }
2443
2444   public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2445     // the string with "\\u" is a legal string of two chars \ and u
2446     //thus we use a direct access to the source (for regular cases).
2447
2448     //    if (unicodeAsBackSlash) {
2449     //      // consume next character
2450     //      unicodeAsBackSlash = false;
2451     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2452     //        && (source[currentPosition] == 'u')) {
2453     //        getNextUnicodeChar();
2454     //      } else {
2455     //        if (withoutUnicodePtr != 0) {
2456     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2457     //        }
2458     //      }
2459     //    } else
2460     currentCharacter = source[currentPosition++];
2461     switch (currentCharacter) {
2462       //      case 'b' :
2463       //        currentCharacter = '\b';
2464       //        break;
2465       case 't' :
2466         currentCharacter = '\t';
2467         break;
2468       case 'n' :
2469         currentCharacter = '\n';
2470         break;
2471         //      case 'f' :
2472         //        currentCharacter = '\f';
2473         //        break;
2474       case 'r' :
2475         currentCharacter = '\r';
2476         break;
2477       case '\"' :
2478         currentCharacter = '\"';
2479         break;
2480       case '\'' :
2481         currentCharacter = '\'';
2482         break;
2483       case '\\' :
2484         currentCharacter = '\\';
2485         break;
2486       case '$' :
2487         currentCharacter = '$';
2488         break;
2489       default :
2490         // -----------octal escape--------------
2491         // OctalDigit
2492         // OctalDigit OctalDigit
2493         // ZeroToThree OctalDigit OctalDigit
2494
2495         int number = Character.getNumericValue(currentCharacter);
2496         if (number >= 0 && number <= 7) {
2497           boolean zeroToThreeNot = number > 3;
2498           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2499             int digit = Character.getNumericValue(currentCharacter);
2500             if (digit >= 0 && digit <= 7) {
2501               number = (number * 8) + digit;
2502               if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2503                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2504                   currentPosition--;
2505                 } else {
2506                   digit = Character.getNumericValue(currentCharacter);
2507                   if (digit >= 0 && digit <= 7) {
2508                     // has read \ZeroToThree OctalDigit OctalDigit
2509                     number = (number * 8) + digit;
2510                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2511                     currentPosition--;
2512                   }
2513                 }
2514               } else { // has read \OctalDigit NonDigit--> ignore last character
2515                 currentPosition--;
2516               }
2517             } else { // has read \OctalDigit NonOctalDigit--> ignore last character
2518               currentPosition--;
2519             }
2520           } else { // has read \OctalDigit --> ignore last character
2521             currentPosition--;
2522           }
2523           if (number > 255)
2524             throw new InvalidInputException(INVALID_ESCAPE);
2525           currentCharacter = (char) number;
2526         }
2527         //else
2528         //     throw new InvalidInputException(INVALID_ESCAPE);
2529     }
2530   }
2531
2532   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2533   //    return scanIdentifierOrKeyword( false );
2534   //  }
2535
2536   public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
2537     //test keywords
2538
2539     //first dispatch on the first char.
2540     //then the length. If there are several
2541     //keywors with the same length AND the same first char, then do another
2542     //disptach on the second char :-)...cool....but fast !
2543
2544     useAssertAsAnIndentifier = false;
2545
2546     while (getNextCharAsJavaIdentifierPart()) {
2547     };
2548
2549     if (isVariable) {
2550       if (new String(getCurrentTokenSource()).equals("$this")) {
2551         return TokenNamethis;
2552       }
2553       return TokenNameVariable;
2554     }
2555     int index, length;
2556     char[] data;
2557     char firstLetter;
2558     //    if (withoutUnicodePtr == 0)
2559
2560     //quick test on length == 1 but not on length > 12 while most identifier
2561     //have a length which is <= 12...but there are lots of identifier with
2562     //only one char....
2563
2564     //      {
2565     if ((length = currentPosition - startPosition) == 1)
2566       return TokenNameIdentifier;
2567     //  data = source;
2568     data = new char[length];
2569     index = startPosition;
2570     for (int i = 0; i < length; i++) {
2571       data[i] = Character.toLowerCase(source[index + i]);
2572     }
2573     index = 0;
2574     //    } else {
2575     //      if ((length = withoutUnicodePtr) == 1)
2576     //        return TokenNameIdentifier;
2577     //      // data = withoutUnicodeBuffer;
2578     //      data = new char[withoutUnicodeBuffer.length];
2579     //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2580     //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2581     //      }
2582     //      index = 1;
2583     //    }
2584
2585     firstLetter = data[index];
2586     switch (firstLetter) {
2587
2588       case 'a' : // as and array
2589         switch (length) {
2590           case 2 : //as
2591             if ((data[++index] == 's')) {
2592               return TokenNameas;
2593             } else {
2594               return TokenNameIdentifier;
2595             }
2596           case 3 : //and
2597             if ((data[++index] == 'n') && (data[++index] == 'd')) {
2598               return TokenNameAND;
2599             } else {
2600               return TokenNameIdentifier;
2601             }
2602             //          case 5 :
2603             //            if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2604             //              return TokenNamearray;
2605             //            else
2606             //              return TokenNameIdentifier;
2607           default :
2608             return TokenNameIdentifier;
2609         }
2610       case 'b' : //break
2611         switch (length) {
2612           case 5 :
2613             if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
2614               return TokenNamebreak;
2615             else
2616               return TokenNameIdentifier;
2617           default :
2618             return TokenNameIdentifier;
2619         }
2620
2621       case 'c' : //case class continue
2622         switch (length) {
2623           case 4 :
2624             if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
2625               return TokenNamecase;
2626             else
2627               return TokenNameIdentifier;
2628           case 5 :
2629             if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
2630               return TokenNameclass;
2631             else
2632               return TokenNameIdentifier;
2633           case 8 :
2634             if ((data[++index] == 'o')
2635               && (data[++index] == 'n')
2636               && (data[++index] == 't')
2637               && (data[++index] == 'i')
2638               && (data[++index] == 'n')
2639               && (data[++index] == 'u')
2640               && (data[++index] == 'e'))
2641               return TokenNamecontinue;
2642             else
2643               return TokenNameIdentifier;
2644           default :
2645             return TokenNameIdentifier;
2646         }
2647
2648       case 'd' : //define default do
2649         switch (length) {
2650           case 2 :
2651             if ((data[++index] == 'o'))
2652               return TokenNamedo;
2653             else
2654               return TokenNameIdentifier;
2655           case 6 :
2656             if ((data[++index] == 'e')
2657               && (data[++index] == 'f')
2658               && (data[++index] == 'i')
2659               && (data[++index] == 'n')
2660               && (data[++index] == 'e'))
2661               return TokenNamedefine;
2662             else
2663               return TokenNameIdentifier;
2664           case 7 :
2665             if ((data[++index] == 'e')
2666               && (data[++index] == 'f')
2667               && (data[++index] == 'a')
2668               && (data[++index] == 'u')
2669               && (data[++index] == 'l')
2670               && (data[++index] == 't'))
2671               return TokenNamedefault;
2672             else
2673               return TokenNameIdentifier;
2674           default :
2675             return TokenNameIdentifier;
2676         }
2677       case 'e' : //echo else elseif extends
2678         switch (length) {
2679           case 4 :
2680             if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
2681               return TokenNameecho;
2682             else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2683               return TokenNameelse;
2684             else
2685               return TokenNameIdentifier;
2686           case 5 : // endif
2687             if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
2688               return TokenNameendif;
2689             else
2690               return TokenNameIdentifier;
2691           case 6 : // endfor
2692             if ((data[++index] == 'n')
2693               && (data[++index] == 'd')
2694               && (data[++index] == 'f')
2695               && (data[++index] == 'o')
2696               && (data[++index] == 'r'))
2697               return TokenNameendfor;
2698             else if (
2699               (data[index] == 'l')
2700                 && (data[++index] == 's')
2701                 && (data[++index] == 'e')
2702                 && (data[++index] == 'i')
2703                 && (data[++index] == 'f'))
2704               return TokenNameelseif;
2705             else
2706               return TokenNameIdentifier;
2707           case 7 :
2708             if ((data[++index] == 'x')
2709               && (data[++index] == 't')
2710               && (data[++index] == 'e')
2711               && (data[++index] == 'n')
2712               && (data[++index] == 'd')
2713               && (data[++index] == 's'))
2714               return TokenNameextends;
2715             else
2716               return TokenNameIdentifier;
2717           case 8 : // endwhile
2718             if ((data[++index] == 'n')
2719               && (data[++index] == 'd')
2720               && (data[++index] == 'w')
2721               && (data[++index] == 'h')
2722               && (data[++index] == 'i')
2723               && (data[++index] == 'l')
2724               && (data[++index] == 'e'))
2725               return TokenNameendwhile;
2726             else
2727               return TokenNameIdentifier;
2728           case 9 : // endswitch
2729             if ((data[++index] == 'n')
2730               && (data[++index] == 'd')
2731               && (data[++index] == 's')
2732               && (data[++index] == 'w')
2733               && (data[++index] == 'i')
2734               && (data[++index] == 't')
2735               && (data[++index] == 'c')
2736               && (data[++index] == 'h'))
2737               return TokenNameendswitch;
2738             else
2739               return TokenNameIdentifier;
2740           case 10 : // endforeach
2741             if ((data[++index] == 'n')
2742               && (data[++index] == 'd')
2743               && (data[++index] == 'f')
2744               && (data[++index] == 'o')
2745               && (data[++index] == 'r')
2746               && (data[++index] == 'e')
2747               && (data[++index] == 'a')
2748               && (data[++index] == 'c')
2749               && (data[++index] == 'h'))
2750               return TokenNameendforeach;
2751             else
2752               return TokenNameIdentifier;
2753
2754           default :
2755             return TokenNameIdentifier;
2756         }
2757
2758       case 'f' : //for false function
2759         switch (length) {
2760           case 3 :
2761             if ((data[++index] == 'o') && (data[++index] == 'r'))
2762               return TokenNamefor;
2763             else
2764               return TokenNameIdentifier;
2765           case 5 :
2766             if ((data[++index] == 'a') && (data[++index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2767               return TokenNamefalse;
2768             else
2769               return TokenNameIdentifier;
2770           case 7 : // function
2771             if ((data[++index] == 'o')
2772               && (data[++index] == 'r')
2773               && (data[++index] == 'e')
2774               && (data[++index] == 'a')
2775               && (data[++index] == 'c')
2776               && (data[++index] == 'h'))
2777               return TokenNameforeach;
2778             else
2779               return TokenNameIdentifier;
2780           case 8 : // function
2781             if ((data[++index] == 'u')
2782               && (data[++index] == 'n')
2783               && (data[++index] == 'c')
2784               && (data[++index] == 't')
2785               && (data[++index] == 'i')
2786               && (data[++index] == 'o')
2787               && (data[++index] == 'n'))
2788               return TokenNamefunction;
2789             else
2790               return TokenNameIdentifier;
2791           default :
2792             return TokenNameIdentifier;
2793         }
2794       case 'g' : //global
2795         if (length == 6) {
2796           if ((data[++index] == 'l')
2797             && (data[++index] == 'o')
2798             && (data[++index] == 'b')
2799             && (data[++index] == 'a')
2800             && (data[++index] == 'l')) {
2801             return TokenNameglobal;
2802           }
2803         }
2804         return TokenNameIdentifier;
2805
2806       case 'i' : //if int
2807         switch (length) {
2808           case 2 :
2809             if (data[++index] == 'f')
2810               return TokenNameif;
2811             else
2812               return TokenNameIdentifier;
2813             //          case 3 :
2814             //            if ((data[++index] == 'n') && (data[++index] == 't'))
2815             //              return TokenNameint;
2816             //            else
2817             //              return TokenNameIdentifier;
2818           case 7 :
2819             if ((data[++index] == 'n')
2820               && (data[++index] == 'c')
2821               && (data[++index] == 'l')
2822               && (data[++index] == 'u')
2823               && (data[++index] == 'd')
2824               && (data[++index] == 'e'))
2825               return TokenNameinclude;
2826             else
2827               return TokenNameIdentifier;
2828           case 12 :
2829             if ((data[++index] == 'n')
2830               && (data[++index] == 'c')
2831               && (data[++index] == 'l')
2832               && (data[++index] == 'u')
2833               && (data[++index] == 'd')
2834               && (data[++index] == 'e')
2835               && (data[++index] == '_')
2836               && (data[++index] == 'o')
2837               && (data[++index] == 'n')
2838               && (data[++index] == 'c')
2839               && (data[++index] == 'e'))
2840               return TokenNameinclude_once;
2841             else
2842               return TokenNameIdentifier;
2843           default :
2844             return TokenNameIdentifier;
2845         }
2846
2847       case 'l' : //list
2848         if (length == 4) {
2849           if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
2850             return TokenNamelist;
2851           }
2852         }
2853         return TokenNameIdentifier;
2854
2855       case 'n' : // new null
2856         switch (length) {
2857           case 3 :
2858             if ((data[++index] == 'e') && (data[++index] == 'w'))
2859               return TokenNamenew;
2860             else
2861               return TokenNameIdentifier;
2862           case 4 :
2863             if ((data[++index] == 'u') && (data[++index] == 'l') && (data[++index] == 'l'))
2864               return TokenNamenull;
2865             else
2866               return TokenNameIdentifier;
2867
2868           default :
2869             return TokenNameIdentifier;
2870         }
2871       case 'o' : // or old_function
2872         if (length == 2) {
2873           if (data[++index] == 'r') {
2874             return TokenNameOR;
2875           }
2876         }
2877         //        if (length == 12) {
2878         //          if ((data[++index] == 'l')
2879         //            && (data[++index] == 'd')
2880         //            && (data[++index] == '_')
2881         //            && (data[++index] == 'f')
2882         //            && (data[++index] == 'u')
2883         //            && (data[++index] == 'n')
2884         //            && (data[++index] == 'c')
2885         //            && (data[++index] == 't')
2886         //            && (data[++index] == 'i')
2887         //            && (data[++index] == 'o')
2888         //            && (data[++index] == 'n')) {
2889         //            return TokenNameold_function;
2890         //          }
2891         //        }
2892         return TokenNameIdentifier;
2893
2894       case 'p' : // print
2895         if (length == 5) {
2896           if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
2897             return TokenNameprint;
2898           }
2899         }
2900         return TokenNameIdentifier;
2901       case 'r' : //return require require_once
2902         if (length == 6) {
2903           if ((data[++index] == 'e')
2904             && (data[++index] == 't')
2905             && (data[++index] == 'u')
2906             && (data[++index] == 'r')
2907             && (data[++index] == 'n')) {
2908             return TokenNamereturn;
2909           }
2910         } else if (length == 7) {
2911           if ((data[++index] == 'e')
2912             && (data[++index] == 'q')
2913             && (data[++index] == 'u')
2914             && (data[++index] == 'i')
2915             && (data[++index] == 'r')
2916             && (data[++index] == 'e')) {
2917             return TokenNamerequire;
2918           }
2919         } else if (length == 12) {
2920           if ((data[++index] == 'e')
2921             && (data[++index] == 'q')
2922             && (data[++index] == 'u')
2923             && (data[++index] == 'i')
2924             && (data[++index] == 'r')
2925             && (data[++index] == 'e')
2926             && (data[++index] == '_')
2927             && (data[++index] == 'o')
2928             && (data[++index] == 'n')
2929             && (data[++index] == 'c')
2930             && (data[++index] == 'e')) {
2931             return TokenNamerequire_once;
2932           }
2933         } else
2934           return TokenNameIdentifier;
2935
2936       case 's' : //static switch
2937         switch (length) {
2938           case 6 :
2939             if (data[++index] == 't')
2940               if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
2941                 return TokenNamestatic;
2942               } else
2943                 return TokenNameIdentifier;
2944             else if (
2945               (data[index] == 'w')
2946                 && (data[++index] == 'i')
2947                 && (data[++index] == 't')
2948                 && (data[++index] == 'c')
2949                 && (data[++index] == 'h'))
2950               return TokenNameswitch;
2951             else
2952               return TokenNameIdentifier;
2953           default :
2954             return TokenNameIdentifier;
2955         }
2956
2957       case 't' : // true
2958         switch (length) {
2959
2960           case 4 :
2961             if ((data[++index] == 'r') && (data[++index] == 'u') && (data[++index] == 'e'))
2962               return TokenNametrue;
2963             else
2964               return TokenNameIdentifier;
2965             //            if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
2966             //              return TokenNamethis;
2967
2968           default :
2969             return TokenNameIdentifier;
2970         }
2971
2972       case 'v' : //var
2973         switch (length) {
2974           case 3 :
2975             if ((data[++index] == 'a') && (data[++index] == 'r'))
2976               return TokenNamevar;
2977             else
2978               return TokenNameIdentifier;
2979
2980           default :
2981             return TokenNameIdentifier;
2982         }
2983
2984       case 'w' : //while
2985         switch (length) {
2986           case 5 :
2987             if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
2988               return TokenNamewhile;
2989             else
2990               return TokenNameIdentifier;
2991             //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
2992             //return TokenNamewidefp ;
2993             //else
2994             //return TokenNameIdentifier;
2995           default :
2996             return TokenNameIdentifier;
2997         }
2998
2999       case 'x' : //xor
3000         switch (length) {
3001           case 3 :
3002             if ((data[++index] == 'o') && (data[++index] == 'r'))
3003               return TokenNameXOR;
3004             else
3005               return TokenNameIdentifier;
3006
3007           default :
3008             return TokenNameIdentifier;
3009         }
3010       default :
3011         return TokenNameIdentifier;
3012     }
3013   }
3014   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3015
3016     //when entering this method the currentCharacter is the firt
3017     //digit of the number , i.e. it may be preceeded by a . when
3018     //dotPrefix is true
3019
3020     boolean floating = dotPrefix;
3021     if ((!dotPrefix) && (currentCharacter == '0')) {
3022       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3023         //force the first char of the hexa number do exist...
3024         // consume next character
3025         unicodeAsBackSlash = false;
3026         currentCharacter = source[currentPosition++];
3027         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3028         //          && (source[currentPosition] == 'u')) {
3029         //          getNextUnicodeChar();
3030         //        } else {
3031         //          if (withoutUnicodePtr != 0) {
3032         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3033         //          }
3034         //        }
3035         if (Character.digit(currentCharacter, 16) == -1)
3036           throw new InvalidInputException(INVALID_HEXA);
3037         //---end forcing--
3038         while (getNextCharAsDigit(16)) {
3039         };
3040         //        if (getNextChar('l', 'L') >= 0)
3041         //          return TokenNameLongLiteral;
3042         //        else
3043         return TokenNameIntegerLiteral;
3044       }
3045
3046       //there is x or X in the number
3047       //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3048       if (getNextCharAsDigit()) {
3049         //-------------potential octal-----------------
3050         while (getNextCharAsDigit()) {
3051         };
3052
3053         //        if (getNextChar('l', 'L') >= 0) {
3054         //          return TokenNameLongLiteral;
3055         //        }
3056         //
3057         //        if (getNextChar('f', 'F') >= 0) {
3058         //          return TokenNameFloatingPointLiteral;
3059         //        }
3060
3061         if (getNextChar('d', 'D') >= 0) {
3062           return TokenNameDoubleLiteral;
3063         } else { //make the distinction between octal and float ....
3064           if (getNextChar('.')) { //bingo ! ....
3065             while (getNextCharAsDigit()) {
3066             };
3067             if (getNextChar('e', 'E') >= 0) {
3068               // consume next character
3069               unicodeAsBackSlash = false;
3070               currentCharacter = source[currentPosition++];
3071               //              if (((currentCharacter = source[currentPosition++]) == '\\')
3072               //                && (source[currentPosition] == 'u')) {
3073               //                getNextUnicodeChar();
3074               //              } else {
3075               //                if (withoutUnicodePtr != 0) {
3076               //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3077               //                }
3078               //              }
3079
3080               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3081                 // consume next character
3082                 unicodeAsBackSlash = false;
3083                 currentCharacter = source[currentPosition++];
3084                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3085                 //                  && (source[currentPosition] == 'u')) {
3086                 //                  getNextUnicodeChar();
3087                 //                } else {
3088                 //                  if (withoutUnicodePtr != 0) {
3089                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3090                 //                      currentCharacter;
3091                 //                  }
3092                 //                }
3093               }
3094               if (!Character.isDigit(currentCharacter))
3095                 throw new InvalidInputException(INVALID_FLOAT);
3096               while (getNextCharAsDigit()) {
3097               };
3098             }
3099             //            if (getNextChar('f', 'F') >= 0)
3100             //              return TokenNameFloatingPointLiteral;
3101             getNextChar('d', 'D'); //jump over potential d or D
3102             return TokenNameDoubleLiteral;
3103           } else {
3104             return TokenNameIntegerLiteral;
3105           }
3106         }
3107       } else {
3108         /* carry on */
3109       }
3110     }
3111
3112     while (getNextCharAsDigit()) {
3113     };
3114
3115     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3116     //      return TokenNameLongLiteral;
3117
3118     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3119       while (getNextCharAsDigit()) {
3120       };
3121       floating = true;
3122     }
3123
3124     //if floating is true both exponant and suffix may be optional
3125
3126     if (getNextChar('e', 'E') >= 0) {
3127       floating = true;
3128       // consume next character
3129       unicodeAsBackSlash = false;
3130       currentCharacter = source[currentPosition++];
3131       //      if (((currentCharacter = source[currentPosition++]) == '\\')
3132       //        && (source[currentPosition] == 'u')) {
3133       //        getNextUnicodeChar();
3134       //      } else {
3135       //        if (withoutUnicodePtr != 0) {
3136       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3137       //        }
3138       //      }
3139
3140       if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume next character
3141         unicodeAsBackSlash = false;
3142         currentCharacter = source[currentPosition++];
3143         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3144         //          && (source[currentPosition] == 'u')) {
3145         //          getNextUnicodeChar();
3146         //        } else {
3147         //          if (withoutUnicodePtr != 0) {
3148         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3149         //          }
3150         //        }
3151       }
3152       if (!Character.isDigit(currentCharacter))
3153         throw new InvalidInputException(INVALID_FLOAT);
3154       while (getNextCharAsDigit()) {
3155       };
3156     }
3157
3158     if (getNextChar('d', 'D') >= 0)
3159       return TokenNameDoubleLiteral;
3160     //    if (getNextChar('f', 'F') >= 0)
3161     //      return TokenNameFloatingPointLiteral;
3162
3163     //the long flag has been tested before
3164
3165     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3166   }
3167   /**
3168    * Search the line number corresponding to a specific position
3169    *
3170    */
3171   public final int getLineNumber(int position) {
3172
3173     if (lineEnds == null)
3174       return 1;
3175     int length = linePtr + 1;
3176     if (length == 0)
3177       return 1;
3178     int g = 0, d = length - 1;
3179     int m = 0;
3180     while (g <= d) {
3181       m = (g + d) / 2;
3182       if (position < lineEnds[m]) {
3183         d = m - 1;
3184       } else if (position > lineEnds[m]) {
3185         g = m + 1;
3186       } else {
3187         return m + 1;
3188       }
3189     }
3190     if (position < lineEnds[m]) {
3191       return m + 1;
3192     }
3193     return m + 2;
3194   }
3195
3196   public void setPHPMode(boolean mode) {
3197     phpMode = mode;
3198   }
3199
3200   public final void setSource(char[] source) {
3201     //the source-buffer is set to sourceString
3202
3203     if (source == null) {
3204       this.source = new char[0];
3205     } else {
3206       this.source = source;
3207     }
3208     startPosition = -1;
3209     initialPosition = currentPosition = 0;
3210     containsAssertKeyword = false;
3211     withoutUnicodeBuffer = new char[this.source.length];
3212
3213   }
3214
3215   public String toString() {
3216     if (startPosition == source.length)
3217       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3218     if (currentPosition > source.length)
3219       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3220
3221     char front[] = new char[startPosition];
3222     System.arraycopy(source, 0, front, 0, startPosition);
3223
3224     int middleLength = (currentPosition - 1) - startPosition + 1;
3225     char middle[];
3226     if (middleLength > -1) {
3227       middle = new char[middleLength];
3228       System.arraycopy(source, startPosition, middle, 0, middleLength);
3229     } else {
3230       middle = new char[0];
3231     }
3232
3233     char end[] = new char[source.length - (currentPosition - 1)];
3234     System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3235
3236     return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3237     + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3238     + new String(end);
3239   }
3240   public final String toStringAction(int act) {
3241
3242     switch (act) {
3243       case TokenNameERROR :
3244         return "ScannerError"; // + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3245       case TokenNameStopPHP :
3246         return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3247       case TokenNameIdentifier :
3248         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3249       case TokenNameVariable :
3250         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3251       case TokenNameas :
3252         return "as"; //$NON-NLS-1$
3253       case TokenNamebreak :
3254         return "break"; //$NON-NLS-1$
3255       case TokenNamecase :
3256         return "case"; //$NON-NLS-1$
3257       case TokenNameclass :
3258         return "class"; //$NON-NLS-1$
3259       case TokenNamecontinue :
3260         return "continue"; //$NON-NLS-1$
3261       case TokenNamedefault :
3262         return "default"; //$NON-NLS-1$
3263       case TokenNamedefine :
3264         return "define"; //$NON-NLS-1$
3265       case TokenNamedo :
3266         return "do"; //$NON-NLS-1$
3267       case TokenNameecho :
3268         return "echo"; //$NON-NLS-1$
3269       case TokenNameelse :
3270         return "else"; //$NON-NLS-1$
3271       case TokenNameelseif :
3272         return "elseif"; //$NON-NLS-1$
3273       case TokenNameendfor :
3274         return "endfor"; //$NON-NLS-1$
3275       case TokenNameendforeach :
3276         return "endforeach"; //$NON-NLS-1$
3277       case TokenNameendif :
3278         return "endif"; //$NON-NLS-1$
3279       case TokenNameendswitch :
3280         return "endswitch"; //$NON-NLS-1$
3281       case TokenNameendwhile :
3282         return "endwhile"; //$NON-NLS-1$
3283       case TokenNameextends :
3284         return "extends"; //$NON-NLS-1$
3285       case TokenNamefalse :
3286         return "false"; //$NON-NLS-1$
3287       case TokenNamefor :
3288         return "for"; //$NON-NLS-1$
3289       case TokenNameforeach :
3290         return "foreach"; //$NON-NLS-1$
3291       case TokenNamefunction :
3292         return "function"; //$NON-NLS-1$
3293       case TokenNameglobal :
3294         return "global"; //$NON-NLS-1$
3295       case TokenNameif :
3296         return "if"; //$NON-NLS-1$
3297       case TokenNameinclude :
3298         return "include"; //$NON-NLS-1$
3299       case TokenNameinclude_once :
3300         return "include_once"; //$NON-NLS-1$
3301       case TokenNamelist :
3302         return "list"; //$NON-NLS-1$
3303       case TokenNamenew :
3304         return "new"; //$NON-NLS-1$
3305       case TokenNamenull :
3306         return "null"; //$NON-NLS-1$
3307       case TokenNameprint :
3308         return "print"; //$NON-NLS-1$
3309       case TokenNamerequire :
3310         return "require"; //$NON-NLS-1$
3311       case TokenNamerequire_once :
3312         return "require_once"; //$NON-NLS-1$
3313       case TokenNamereturn :
3314         return "return"; //$NON-NLS-1$
3315       case TokenNamestatic :
3316         return "static"; //$NON-NLS-1$
3317       case TokenNameswitch :
3318         return "switch"; //$NON-NLS-1$
3319       case TokenNametrue :
3320         return "true"; //$NON-NLS-1$
3321       case TokenNamevar :
3322         return "var"; //$NON-NLS-1$
3323       case TokenNamewhile :
3324         return "while"; //$NON-NLS-1$
3325       case TokenNamethis :
3326         return "$this"; //$NON-NLS-1$
3327       case TokenNameIntegerLiteral :
3328         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3329       case TokenNameDoubleLiteral :
3330         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3331       case TokenNameStringLiteral :
3332         return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3333       case TokenNameStringConstant :
3334         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3335       case TokenNameStringInterpolated :
3336         return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3337       case TokenNameHEREDOC :
3338         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3339
3340       case TokenNamePLUS_PLUS :
3341         return "++"; //$NON-NLS-1$
3342       case TokenNameMINUS_MINUS :
3343         return "--"; //$NON-NLS-1$
3344       case TokenNameEQUAL_EQUAL :
3345         return "=="; //$NON-NLS-1$
3346       case TokenNameEQUAL_EQUAL_EQUAL :
3347         return "==="; //$NON-NLS-1$
3348       case TokenNameEQUAL_GREATER :
3349         return "=>"; //$NON-NLS-1$
3350       case TokenNameLESS_EQUAL :
3351         return "<="; //$NON-NLS-1$
3352       case TokenNameGREATER_EQUAL :
3353         return ">="; //$NON-NLS-1$
3354       case TokenNameNOT_EQUAL :
3355         return "!="; //$NON-NLS-1$
3356       case TokenNameNOT_EQUAL_EQUAL :
3357         return "!=="; //$NON-NLS-1$
3358       case TokenNameLEFT_SHIFT :
3359         return "<<"; //$NON-NLS-1$
3360       case TokenNameRIGHT_SHIFT :
3361         return ">>"; //$NON-NLS-1$
3362       case TokenNamePLUS_EQUAL :
3363         return "+="; //$NON-NLS-1$
3364       case TokenNameMINUS_EQUAL :
3365         return "-="; //$NON-NLS-1$
3366       case TokenNameMULTIPLY_EQUAL :
3367         return "*="; //$NON-NLS-1$
3368       case TokenNameDIVIDE_EQUAL :
3369         return "/="; //$NON-NLS-1$
3370       case TokenNameAND_EQUAL :
3371         return "&="; //$NON-NLS-1$
3372       case TokenNameOR_EQUAL :
3373         return "|="; //$NON-NLS-1$
3374       case TokenNameXOR_EQUAL :
3375         return "^="; //$NON-NLS-1$
3376       case TokenNameREMAINDER_EQUAL :
3377         return "%="; //$NON-NLS-1$
3378       case TokenNameLEFT_SHIFT_EQUAL :
3379         return "<<="; //$NON-NLS-1$
3380       case TokenNameRIGHT_SHIFT_EQUAL :
3381         return ">>="; //$NON-NLS-1$
3382       case TokenNameOR_OR :
3383         return "||"; //$NON-NLS-1$
3384       case TokenNameAND_AND :
3385         return "&&"; //$NON-NLS-1$
3386       case TokenNamePLUS :
3387         return "+"; //$NON-NLS-1$
3388       case TokenNameMINUS :
3389         return "-"; //$NON-NLS-1$
3390       case TokenNameMINUS_GREATER :
3391         return "->";
3392       case TokenNameNOT :
3393         return "!"; //$NON-NLS-1$
3394       case TokenNameREMAINDER :
3395         return "%"; //$NON-NLS-1$
3396       case TokenNameXOR :
3397         return "^"; //$NON-NLS-1$
3398       case TokenNameAND :
3399         return "&"; //$NON-NLS-1$
3400       case TokenNameMULTIPLY :
3401         return "*"; //$NON-NLS-1$
3402       case TokenNameOR :
3403         return "|"; //$NON-NLS-1$
3404       case TokenNameTWIDDLE :
3405         return "~"; //$NON-NLS-1$
3406       case TokenNameTWIDDLE_EQUAL :
3407         return "~="; //$NON-NLS-1$
3408       case TokenNameDIVIDE :
3409         return "/"; //$NON-NLS-1$
3410       case TokenNameGREATER :
3411         return ">"; //$NON-NLS-1$
3412       case TokenNameLESS :
3413         return "<"; //$NON-NLS-1$
3414       case TokenNameLPAREN :
3415         return "("; //$NON-NLS-1$
3416       case TokenNameRPAREN :
3417         return ")"; //$NON-NLS-1$
3418       case TokenNameLBRACE :
3419         return "{"; //$NON-NLS-1$
3420       case TokenNameRBRACE :
3421         return "}"; //$NON-NLS-1$
3422       case TokenNameLBRACKET :
3423         return "["; //$NON-NLS-1$
3424       case TokenNameRBRACKET :
3425         return "]"; //$NON-NLS-1$
3426       case TokenNameSEMICOLON :
3427         return ";"; //$NON-NLS-1$
3428       case TokenNameQUESTION :
3429         return "?"; //$NON-NLS-1$
3430       case TokenNameCOLON :
3431         return ":"; //$NON-NLS-1$
3432       case TokenNameCOMMA :
3433         return ","; //$NON-NLS-1$
3434       case TokenNameDOT :
3435         return "."; //$NON-NLS-1$
3436       case TokenNameEQUAL :
3437         return "="; //$NON-NLS-1$
3438       case TokenNameAT :
3439         return "@";
3440       case TokenNameDOLLAR_LBRACE :
3441         return "${";
3442       case TokenNameEOF :
3443         return "EOF"; //$NON-NLS-1$
3444       case TokenNameWHITESPACE :
3445         return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3446       case TokenNameCOMMENT_LINE :
3447         return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3448       case TokenNameCOMMENT_BLOCK :
3449         return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3450       case TokenNameCOMMENT_PHPDOC :
3451         return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3452       case TokenNameHTML :
3453         return "HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3454       default :
3455         return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3456     }
3457   }
3458
3459   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
3460     this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
3461   }
3462
3463   public Scanner(
3464     boolean tokenizeComments,
3465     boolean tokenizeWhiteSpace,
3466     boolean checkNonExternalizedStringLiterals,
3467     boolean assertMode) {
3468     this.eofPosition = Integer.MAX_VALUE;
3469     this.tokenizeComments = tokenizeComments;
3470     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3471     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3472     this.assertMode = assertMode;
3473   }
3474
3475   private void checkNonExternalizeString() throws InvalidInputException {
3476     if (currentLine == null)
3477       return;
3478     parseTags(currentLine);
3479   }
3480
3481   private void parseTags(NLSLine line) throws InvalidInputException {
3482     String s = new String(getCurrentTokenSource());
3483     int pos = s.indexOf(TAG_PREFIX);
3484     int lineLength = line.size();
3485     while (pos != -1) {
3486       int start = pos + TAG_PREFIX_LENGTH;
3487       int end = s.indexOf(TAG_POSTFIX, start);
3488       String index = s.substring(start, end);
3489       int i = 0;
3490       try {
3491         i = Integer.parseInt(index) - 1;
3492         // Tags are one based not zero based.
3493       } catch (NumberFormatException e) {
3494         i = -1; // we don't want to consider this as a valid NLS tag
3495       }
3496       if (line.exists(i)) {
3497         line.set(i, null);
3498       }
3499       pos = s.indexOf(TAG_PREFIX, start);
3500     }
3501
3502     this.nonNLSStrings = new StringLiteral[lineLength];
3503     int nonNLSCounter = 0;
3504     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3505       StringLiteral literal = (StringLiteral) iterator.next();
3506       if (literal != null) {
3507         this.nonNLSStrings[nonNLSCounter++] = literal;
3508       }
3509     }
3510     if (nonNLSCounter == 0) {
3511       this.nonNLSStrings = null;
3512       currentLine = null;
3513       return;
3514     }
3515     this.wasNonExternalizedStringLiteral = true;
3516     if (nonNLSCounter != lineLength) {
3517       System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
3518     }
3519     currentLine = null;
3520   }
3521 }