net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java

   1 /*******************************************************************************
   2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
   3  * All rights reserved. This program and the accompanying materials
   4  * are made available under the terms of the Common Public License v0.5
   5  * which accompanies this distribution, and is available at
   6  * http://www.eclipse.org/legal/cpl-v05.html
   7  *
   8  * Contributors:
   9  *     IBM Corporation - initial API and implementation
  10  ******************************************************************************/
  11 package net.sourceforge.phpdt.internal.compiler.parser;
  12 import java.util.ArrayList;
  13 import java.util.Iterator;
  14 import java.util.List;
  15 import net.sourceforge.phpdt.core.compiler.CharOperation;
  16 import net.sourceforge.phpdt.core.compiler.IScanner;
  17 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
  18 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
  19 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
  20 public class Scanner implements IScanner, ITerminalSymbols {
  21   /*
  22    * APIs ares - getNextToken() which return the current type of the token
  23    * (this value is not memorized by the scanner) - getCurrentTokenSource()
  24    * which provides with the token "REAL" source (aka all unicode have been
  25    * transformed into a correct char) - sourceStart gives the position into the
  26    * stream - currentPosition-1 gives the sourceEnd position into the stream
  27    */
  28   // 1.4 feature
  29   private boolean assertMode;
  30   public boolean useAssertAsAnIndentifier = false;
  31   //flag indicating if processed source contains occurrences of keyword assert
  32   public boolean containsAssertKeyword = false;
  33   public boolean recordLineSeparator;
  34   public boolean phpMode = false;
  35   public char currentCharacter;
  36   public int startPosition;
  37   public int currentPosition;
  38   public int initialPosition, eofPosition;
  39   // after this position eof are generated instead of real token from the
  40   // source
  41   public boolean tokenizeComments;
  42   public boolean tokenizeWhiteSpace;
  43   //source should be viewed as a window (aka a part)
  44   //of a entire very large stream
  45   public char source[];
  46   //unicode support
  47   public char[] withoutUnicodeBuffer;
  48   public int withoutUnicodePtr;
  49   //when == 0 ==> no unicode in the current token
  50   public boolean unicodeAsBackSlash = false;
  51   public boolean scanningFloatLiteral = false;
  52   //support for /** comments
  53   //public char[][] comments = new char[10][];
  54   public int[] commentStops = new int[10];
  55   public int[] commentStarts = new int[10];
  56   public int commentPtr = -1; // no comment test with commentPtr value -1
  57   //diet parsing support - jump over some method body when requested
  58   public boolean diet = false;
  59   //support for the poor-line-debuggers ....
  60   //remember the position of the cr/lf
  61   public int[] lineEnds = new int[250];
  62   public int linePtr = -1;
  63   public boolean wasAcr = false;
  64   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
  65   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
  66   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
  67   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
  68   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
  69   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
  70   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
  71   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
  72   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
  73   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
  74   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
  75   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
  76   //----------------optimized identifier managment------------------
  77   static final char[] charArray_a = new char[]{'a'},
  78       charArray_b = new char[]{'b'}, charArray_c = new char[]{'c'},
  79       charArray_d = new char[]{'d'}, charArray_e = new char[]{'e'},
  80       charArray_f = new char[]{'f'}, charArray_g = new char[]{'g'},
  81       charArray_h = new char[]{'h'}, charArray_i = new char[]{'i'},
  82       charArray_j = new char[]{'j'}, charArray_k = new char[]{'k'},
  83       charArray_l = new char[]{'l'}, charArray_m = new char[]{'m'},
  84       charArray_n = new char[]{'n'}, charArray_o = new char[]{'o'},
  85       charArray_p = new char[]{'p'}, charArray_q = new char[]{'q'},
  86       charArray_r = new char[]{'r'}, charArray_s = new char[]{'s'},
  87       charArray_t = new char[]{'t'}, charArray_u = new char[]{'u'},
  88       charArray_v = new char[]{'v'}, charArray_w = new char[]{'w'},
  89       charArray_x = new char[]{'x'}, charArray_y = new char[]{'y'},
  90       charArray_z = new char[]{'z'};
  91   static final char[] initCharArray = new char[]{'\u0000', '\u0000', '\u0000',
  92       '\u0000', '\u0000', '\u0000'};
  93   static final int TableSize = 30, InternalTableSize = 6;
  94   //30*6 = 180 entries
  95   public static final int OptimizedLength = 6;
  96   public/* static */
  97   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
  98   // support for detecting non-externalized string literals
  99   int currentLineNr = -1;
 100   int previousLineNr = -1;
 101   NLSLine currentLine = null;
 102   List lines = new ArrayList();
 103   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
 104   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
 105   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
 106   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
 107   public StringLiteral[] nonNLSStrings = null;
 108   public boolean checkNonExternalizedStringLiterals = true;
 109   public boolean wasNonExternalizedStringLiteral = false;
 110   /* static */{
 111     for (int i = 0; i < 6; i++) {
 112       for (int j = 0; j < TableSize; j++) {
 113         for (int k = 0; k < InternalTableSize; k++) {
 114           charArray_length[i][j][k] = initCharArray;
 115         }
 116       }
 117     }
 118   }
 119   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0,
 120       newEntry6 = 0;
 121   public static final int RoundBracket = 0;
 122   public static final int SquareBracket = 1;
 123   public static final int CurlyBracket = 2;
 124   public static final int BracketKinds = 3;
 125   // task tag support
 126   public char[][] foundTaskTags = null;
 127   public char[][] foundTaskMessages;
 128   public char[][] foundTaskPriorities = null;
 129   public int[][] foundTaskPositions;
 130   public int foundTaskCount = 0;
 131   public char[][] taskTags = null;
 132   public char[][] taskPriorities = null;
 133   public static final boolean DEBUG = false;
 134   public static final boolean TRACE = false;
 135   public Scanner() {
 136     this(false, false);
 137   }
 138   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
 139     this(tokenizeComments, tokenizeWhiteSpace, false);
 140   }
 141   /**
 142    * Determines if the specified character is permissible as the first
 143    * character in a PHP identifier
 144    */
 145   public static boolean isPHPIdentifierStart(char ch) {
 146     return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 147   }
 148   /**
 149    * Determines if the specified character may be part of a PHP identifier as
 150    * other than the first character
 151    */
 152   public static boolean isPHPIdentifierPart(char ch) {
 153     return Character.isLetterOrDigit(ch) || (ch == '_')
 154         || (0x7F <= ch && ch <= 0xFF);
 155   }
 156   public final boolean atEnd() {
 157     // This code is not relevant if source is
 158     // Only a part of the real stream input
 159     return source.length == currentPosition;
 160   }
 161   public char[] getCurrentIdentifierSource() {
 162     //return the token REAL source (aka unicodes are precomputed)
 163     char[] result;
 164     //    if (withoutUnicodePtr != 0)
 165     //      //0 is used as a fast test flag so the real first char is in position 1
 166     //      System.arraycopy(
 167     //        withoutUnicodeBuffer,
 168     //        1,
 169     //        result = new char[withoutUnicodePtr],
 170     //        0,
 171     //        withoutUnicodePtr);
 172     //    else {
 173     int length = currentPosition - startPosition;
 174     switch (length) { // see OptimizedLength
 175       case 1 :
 176         return optimizedCurrentTokenSource1();
 177       case 2 :
 178         return optimizedCurrentTokenSource2();
 179       case 3 :
 180         return optimizedCurrentTokenSource3();
 181       case 4 :
 182         return optimizedCurrentTokenSource4();
 183       case 5 :
 184         return optimizedCurrentTokenSource5();
 185       case 6 :
 186         return optimizedCurrentTokenSource6();
 187     }
 188     //no optimization
 189     System.arraycopy(source, startPosition, result = new char[length], 0,
 190         length);
 191     //   }
 192     return result;
 193   }
 194   public int getCurrentTokenEndPosition() {
 195     return this.currentPosition - 1;
 196   }
 197   public final char[] getCurrentTokenSource() {
 198     // Return the token REAL source (aka unicodes are precomputed)
 199     char[] result;
 200     //    if (withoutUnicodePtr != 0)
 201     //      // 0 is used as a fast test flag so the real first char is in position 1
 202     //      System.arraycopy(
 203     //        withoutUnicodeBuffer,
 204     //        1,
 205     //        result = new char[withoutUnicodePtr],
 206     //        0,
 207     //        withoutUnicodePtr);
 208     //    else {
 209     int length;
 210     System.arraycopy(source, startPosition,
 211         result = new char[length = currentPosition - startPosition], 0, length);
 212     //    }
 213     return result;
 214   }
 215   public final char[] getCurrentTokenSource(int startPos) {
 216     // Return the token REAL source (aka unicodes are precomputed)
 217     char[] result;
 218     //    if (withoutUnicodePtr != 0)
 219     //      // 0 is used as a fast test flag so the real first char is in position 1
 220     //      System.arraycopy(
 221     //        withoutUnicodeBuffer,
 222     //        1,
 223     //        result = new char[withoutUnicodePtr],
 224     //        0,
 225     //        withoutUnicodePtr);
 226     //    else {
 227     int length;
 228     System.arraycopy(source, startPos,
 229         result = new char[length = currentPosition - startPos], 0, length);
 230     //  }
 231     return result;
 232   }
 233   public final char[] getCurrentTokenSourceString() {
 234     //return the token REAL source (aka unicodes are precomputed).
 235     //REMOVE the two " that are at the beginning and the end.
 236     char[] result;
 237     if (withoutUnicodePtr != 0)
 238       //0 is used as a fast test flag so the real first char is in position 1
 239       System.arraycopy(withoutUnicodeBuffer, 2,
 240       //2 is 1 (real start) + 1 (to jump over the ")
 241           result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
 242     else {
 243       int length;
 244       System.arraycopy(source, startPosition + 1,
 245           result = new char[length = currentPosition - startPosition - 2], 0,
 246           length);
 247     }
 248     return result;
 249   }
 250   public int getCurrentTokenStartPosition() {
 251     return this.startPosition;
 252   }
 253   public final char[] getCurrentStringLiteralSource() {
 254     // Return the token REAL source (aka unicodes are precomputed)
 255     char[] result;
 256     int length;
 257     System.arraycopy(source, startPosition + 1,
 258         result = new char[length = currentPosition - startPosition - 2], 0,
 259         length);
 260     //    }
 261     return result;
 262   }
 263   /*
 264    * Search the source position corresponding to the end of a given line number
 265    *
 266    * Line numbers are 1-based, and relative to the scanner initialPosition.
 267    * Character positions are 0-based.
 268    *
 269    * In case the given line number is inconsistent, answers -1.
 270    */
 271   public final int getLineEnd(int lineNumber) {
 272     if (lineEnds == null)
 273       return -1;
 274     if (lineNumber >= lineEnds.length)
 275       return -1;
 276     if (lineNumber <= 0)
 277       return -1;
 278     if (lineNumber == lineEnds.length - 1)
 279       return eofPosition;
 280     return lineEnds[lineNumber - 1];
 281     // next line start one character behind the lineEnd of the previous line
 282   }
 283   /**
 284    * Search the source position corresponding to the beginning of a given line
 285    * number
 286    *
 287    * Line numbers are 1-based, and relative to the scanner initialPosition.
 288    * Character positions are 0-based.
 289    *
 290    * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
 291    *
 292    * In case the given line number is inconsistent, answers -1.
 293    */
 294   public final int getLineStart(int lineNumber) {
 295     if (lineEnds == null)
 296       return -1;
 297     if (lineNumber >= lineEnds.length)
 298       return -1;
 299     if (lineNumber <= 0)
 300       return -1;
 301     if (lineNumber == 1)
 302       return initialPosition;
 303     return lineEnds[lineNumber - 2] + 1;
 304     // next line start one character behind the lineEnd of the previous line
 305   }
 306   public final boolean getNextChar(char testedChar) {
 307     //BOOLEAN
 308     //handle the case of unicode.
 309     //when a unicode appears then we must use a buffer that holds char
 310     // internal values
 311     //At the end of this method currentCharacter holds the new visited char
 312     //and currentPosition points right next after it
 313     //Both previous lines are true if the currentCharacter is == to the
 314     // testedChar
 315     //On false, no side effect has occured.
 316     //ALL getNextChar.... ARE OPTIMIZED COPIES
 317     int temp = currentPosition;
 318     try {
 319       currentCharacter = source[currentPosition++];
 320       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 321       //        && (source[currentPosition] == 'u')) {
 322       //        //-------------unicode traitement ------------
 323       //        int c1, c2, c3, c4;
 324       //        int unicodeSize = 6;
 325       //        currentPosition++;
 326       //        while (source[currentPosition] == 'u') {
 327       //          currentPosition++;
 328       //          unicodeSize++;
 329       //        }
 330       //
 331       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 332       //          || c1 < 0)
 333       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 334       //            || c2 < 0)
 335       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 336       //            || c3 < 0)
 337       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 338       //            || c4 < 0)) {
 339       //          currentPosition = temp;
 340       //          return false;
 341       //        }
 342       //
 343       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 344       //        if (currentCharacter != testedChar) {
 345       //          currentPosition = temp;
 346       //          return false;
 347       //        }
 348       //        unicodeAsBackSlash = currentCharacter == '\\';
 349       //
 350       //        //need the unicode buffer
 351       //        if (withoutUnicodePtr == 0) {
 352       //          //buffer all the entries that have been left aside....
 353       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 354       //          System.arraycopy(
 355       //            source,
 356       //            startPosition,
 357       //            withoutUnicodeBuffer,
 358       //            1,
 359       //            withoutUnicodePtr);
 360       //        }
 361       //        //fill the buffer with the char
 362       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 363       //        return true;
 364       //
 365       //      } //-------------end unicode traitement--------------
 366       //      else {
 367       if (currentCharacter != testedChar) {
 368         currentPosition = temp;
 369         return false;
 370       }
 371       unicodeAsBackSlash = false;
 372       //        if (withoutUnicodePtr != 0)
 373       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 374       return true;
 375       //      }
 376     } catch (IndexOutOfBoundsException e) {
 377       unicodeAsBackSlash = false;
 378       currentPosition = temp;
 379       return false;
 380     }
 381   }
 382   public final int getNextChar(char testedChar1, char testedChar2) {
 383     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
 384     //test can be done with (x==0) for the first and (x>0) for the second
 385     //handle the case of unicode.
 386     //when a unicode appears then we must use a buffer that holds char
 387     // internal values
 388     //At the end of this method currentCharacter holds the new visited char
 389     //and currentPosition points right next after it
 390     //Both previous lines are true if the currentCharacter is == to the
 391     // testedChar1/2
 392     //On false, no side effect has occured.
 393     //ALL getNextChar.... ARE OPTIMIZED COPIES
 394     int temp = currentPosition;
 395     try {
 396       int result;
 397       currentCharacter = source[currentPosition++];
 398       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 399       //        && (source[currentPosition] == 'u')) {
 400       //        //-------------unicode traitement ------------
 401       //        int c1, c2, c3, c4;
 402       //        int unicodeSize = 6;
 403       //        currentPosition++;
 404       //        while (source[currentPosition] == 'u') {
 405       //          currentPosition++;
 406       //          unicodeSize++;
 407       //        }
 408       //
 409       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 410       //          || c1 < 0)
 411       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 412       //            || c2 < 0)
 413       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 414       //            || c3 < 0)
 415       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 416       //            || c4 < 0)) {
 417       //          currentPosition = temp;
 418       //          return 2;
 419       //        }
 420       //
 421       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 422       //        if (currentCharacter == testedChar1)
 423       //          result = 0;
 424       //        else if (currentCharacter == testedChar2)
 425       //          result = 1;
 426       //        else {
 427       //          currentPosition = temp;
 428       //          return -1;
 429       //        }
 430       //
 431       //        //need the unicode buffer
 432       //        if (withoutUnicodePtr == 0) {
 433       //          //buffer all the entries that have been left aside....
 434       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 435       //          System.arraycopy(
 436       //            source,
 437       //            startPosition,
 438       //            withoutUnicodeBuffer,
 439       //            1,
 440       //            withoutUnicodePtr);
 441       //        }
 442       //        //fill the buffer with the char
 443       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 444       //        return result;
 445       //      } //-------------end unicode traitement--------------
 446       //      else {
 447       if (currentCharacter == testedChar1)
 448         result = 0;
 449       else if (currentCharacter == testedChar2)
 450         result = 1;
 451       else {
 452         currentPosition = temp;
 453         return -1;
 454       }
 455       //        if (withoutUnicodePtr != 0)
 456       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 457       return result;
 458       //     }
 459     } catch (IndexOutOfBoundsException e) {
 460       currentPosition = temp;
 461       return -1;
 462     }
 463   }
 464   public final boolean getNextCharAsDigit() {
 465     //BOOLEAN
 466     //handle the case of unicode.
 467     //when a unicode appears then we must use a buffer that holds char
 468     // internal values
 469     //At the end of this method currentCharacter holds the new visited char
 470     //and currentPosition points right next after it
 471     //Both previous lines are true if the currentCharacter is a digit
 472     //On false, no side effect has occured.
 473     //ALL getNextChar.... ARE OPTIMIZED COPIES
 474     int temp = currentPosition;
 475     try {
 476       currentCharacter = source[currentPosition++];
 477       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 478       //        && (source[currentPosition] == 'u')) {
 479       //        //-------------unicode traitement ------------
 480       //        int c1, c2, c3, c4;
 481       //        int unicodeSize = 6;
 482       //        currentPosition++;
 483       //        while (source[currentPosition] == 'u') {
 484       //          currentPosition++;
 485       //          unicodeSize++;
 486       //        }
 487       //
 488       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 489       //          || c1 < 0)
 490       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 491       //            || c2 < 0)
 492       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 493       //            || c3 < 0)
 494       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 495       //            || c4 < 0)) {
 496       //          currentPosition = temp;
 497       //          return false;
 498       //        }
 499       //
 500       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 501       //        if (!Character.isDigit(currentCharacter)) {
 502       //          currentPosition = temp;
 503       //          return false;
 504       //        }
 505       //
 506       //        //need the unicode buffer
 507       //        if (withoutUnicodePtr == 0) {
 508       //          //buffer all the entries that have been left aside....
 509       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 510       //          System.arraycopy(
 511       //            source,
 512       //            startPosition,
 513       //            withoutUnicodeBuffer,
 514       //            1,
 515       //            withoutUnicodePtr);
 516       //        }
 517       //        //fill the buffer with the char
 518       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 519       //        return true;
 520       //      } //-------------end unicode traitement--------------
 521       //      else {
 522       if (!Character.isDigit(currentCharacter)) {
 523         currentPosition = temp;
 524         return false;
 525       }
 526       //        if (withoutUnicodePtr != 0)
 527       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 528       return true;
 529       //      }
 530     } catch (IndexOutOfBoundsException e) {
 531       currentPosition = temp;
 532       return false;
 533     }
 534   }
 535   public final boolean getNextCharAsDigit(int radix) {
 536     //BOOLEAN
 537     //handle the case of unicode.
 538     //when a unicode appears then we must use a buffer that holds char
 539     // internal values
 540     //At the end of this method currentCharacter holds the new visited char
 541     //and currentPosition points right next after it
 542     //Both previous lines are true if the currentCharacter is a digit base on
 543     // radix
 544     //On false, no side effect has occured.
 545     //ALL getNextChar.... ARE OPTIMIZED COPIES
 546     int temp = currentPosition;
 547     try {
 548       currentCharacter = source[currentPosition++];
 549       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 550       //        && (source[currentPosition] == 'u')) {
 551       //        //-------------unicode traitement ------------
 552       //        int c1, c2, c3, c4;
 553       //        int unicodeSize = 6;
 554       //        currentPosition++;
 555       //        while (source[currentPosition] == 'u') {
 556       //          currentPosition++;
 557       //          unicodeSize++;
 558       //        }
 559       //
 560       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 561       //          || c1 < 0)
 562       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 563       //            || c2 < 0)
 564       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 565       //            || c3 < 0)
 566       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 567       //            || c4 < 0)) {
 568       //          currentPosition = temp;
 569       //          return false;
 570       //        }
 571       //
 572       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 573       //        if (Character.digit(currentCharacter, radix) == -1) {
 574       //          currentPosition = temp;
 575       //          return false;
 576       //        }
 577       //
 578       //        //need the unicode buffer
 579       //        if (withoutUnicodePtr == 0) {
 580       //          //buffer all the entries that have been left aside....
 581       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 582       //          System.arraycopy(
 583       //            source,
 584       //            startPosition,
 585       //            withoutUnicodeBuffer,
 586       //            1,
 587       //            withoutUnicodePtr);
 588       //        }
 589       //        //fill the buffer with the char
 590       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 591       //        return true;
 592       //      } //-------------end unicode traitement--------------
 593       //      else {
 594       if (Character.digit(currentCharacter, radix) == -1) {
 595         currentPosition = temp;
 596         return false;
 597       }
 598       //        if (withoutUnicodePtr != 0)
 599       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 600       return true;
 601       //      }
 602     } catch (IndexOutOfBoundsException e) {
 603       currentPosition = temp;
 604       return false;
 605     }
 606   }
 607   public boolean getNextCharAsJavaIdentifierPart() {
 608     //BOOLEAN
 609     //handle the case of unicode.
 610     //when a unicode appears then we must use a buffer that holds char
 611     // internal values
 612     //At the end of this method currentCharacter holds the new visited char
 613     //and currentPosition points right next after it
 614     //Both previous lines are true if the currentCharacter is a
 615     // JavaIdentifierPart
 616     //On false, no side effect has occured.
 617     //ALL getNextChar.... ARE OPTIMIZED COPIES
 618     int temp = currentPosition;
 619     try {
 620       currentCharacter = source[currentPosition++];
 621       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 622       //        && (source[currentPosition] == 'u')) {
 623       //        //-------------unicode traitement ------------
 624       //        int c1, c2, c3, c4;
 625       //        int unicodeSize = 6;
 626       //        currentPosition++;
 627       //        while (source[currentPosition] == 'u') {
 628       //          currentPosition++;
 629       //          unicodeSize++;
 630       //        }
 631       //
 632       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 633       //          || c1 < 0)
 634       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 635       //            || c2 < 0)
 636       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 637       //            || c3 < 0)
 638       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 639       //            || c4 < 0)) {
 640       //          currentPosition = temp;
 641       //          return false;
 642       //        }
 643       //
 644       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 645       //        if (!isPHPIdentifierPart(currentCharacter)) {
 646       //          currentPosition = temp;
 647       //          return false;
 648       //        }
 649       //
 650       //        //need the unicode buffer
 651       //        if (withoutUnicodePtr == 0) {
 652       //          //buffer all the entries that have been left aside....
 653       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 654       //          System.arraycopy(
 655       //            source,
 656       //            startPosition,
 657       //            withoutUnicodeBuffer,
 658       //            1,
 659       //            withoutUnicodePtr);
 660       //        }
 661       //        //fill the buffer with the char
 662       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 663       //        return true;
 664       //      } //-------------end unicode traitement--------------
 665       //      else {
 666       if (!isPHPIdentifierPart(currentCharacter)) {
 667         currentPosition = temp;
 668         return false;
 669       }
 670       //        if (withoutUnicodePtr != 0)
 671       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 672       return true;
 673       //      }
 674     } catch (IndexOutOfBoundsException e) {
 675       currentPosition = temp;
 676       return false;
 677     }
 678   }
 679   public int getCastOrParen() {
 680     int tempPosition = currentPosition;
 681     char tempCharacter = currentCharacter;
 682     int tempToken = TokenNameLPAREN;
 683     boolean found = false;
 684     StringBuffer buf = new StringBuffer();
 685     try {
 686       do {
 687         currentCharacter = source[currentPosition++];
 688       } while (currentCharacter == ' ' || currentCharacter == '\t');
 689       while ((currentCharacter >= 'a' && currentCharacter <= 'z')
 690           || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
 691         buf.append(currentCharacter);
 692         currentCharacter = source[currentPosition++];
 693       }
 694       if (buf.length() >= 3 && buf.length() <= 7) {
 695         char[] data = buf.toString().toCharArray();
 696         int index = 0;
 697         switch (data.length) {
 698           case 3 :
 699             // int
 700             if ((data[index] == 'i') && (data[++index] == 'n')
 701                 && (data[++index] == 't')) {
 702               found = true;
 703               tempToken = TokenNameintCAST;
 704             }
 705             break;
 706           case 4 :
 707             // bool real
 708             if ((data[index] == 'b') && (data[++index] == 'o')
 709                 && (data[++index] == 'o') && (data[++index] == 'l')) {
 710               found = true;
 711               tempToken = TokenNameboolCAST;
 712             } else {
 713               index = 0;
 714               if ((data[index] == 'r') && (data[++index] == 'e')
 715                   && (data[++index] == 'a') && (data[++index] == 'l')) {
 716                 found = true;
 717                 tempToken = TokenNamedoubleCAST;
 718               }
 719             }
 720             break;
 721           case 5 :
 722             // array unset float
 723             if ((data[index] == 'a') && (data[++index] == 'r')
 724                 && (data[++index] == 'r') && (data[++index] == 'a')
 725                 && (data[++index] == 'y')) {
 726               found = true;
 727               tempToken = TokenNamearrayCAST;
 728             } else {
 729               index = 0;
 730               if ((data[index] == 'u') && (data[++index] == 'n')
 731                   && (data[++index] == 's') && (data[++index] == 'e')
 732                   && (data[++index] == 't')) {
 733                 found = true;
 734                 tempToken = TokenNameunsetCAST;
 735               } else {
 736                 index = 0;
 737                 if ((data[index] == 'f') && (data[++index] == 'l')
 738                     && (data[++index] == 'o') && (data[++index] == 'a')
 739                     && (data[++index] == 't')) {
 740                   found = true;
 741                   tempToken = TokenNamedoubleCAST;
 742                 }
 743               }
 744             }
 745             break;
 746           case 6 :
 747             // object string double
 748             if ((data[index] == 'o') && (data[++index] == 'b')
 749                 && (data[++index] == 'j') && (data[++index] == 'e')
 750                 && (data[++index] == 'c') && (data[++index] == 't')) {
 751               found = true;
 752               tempToken = TokenNameobjectCAST;
 753             } else {
 754               index = 0;
 755               if ((data[index] == 's') && (data[++index] == 't')
 756                   && (data[++index] == 'r') && (data[++index] == 'i')
 757                   && (data[++index] == 'n') && (data[++index] == 'g')) {
 758                 found = true;
 759                 tempToken = TokenNamestringCAST;
 760               } else {
 761                 index = 0;
 762                 if ((data[index] == 'd') && (data[++index] == 'o')
 763                     && (data[++index] == 'u') && (data[++index] == 'b')
 764                     && (data[++index] == 'l') && (data[++index] == 'e')) {
 765                   found = true;
 766                   tempToken = TokenNamedoubleCAST;
 767                 }
 768               }
 769             }
 770             break;
 771           case 7 :
 772             // boolean integer
 773             if ((data[index] == 'b') && (data[++index] == 'o')
 774                 && (data[++index] == 'o') && (data[++index] == 'l')
 775                 && (data[++index] == 'e') && (data[++index] == 'a')
 776                 && (data[++index] == 'n')) {
 777               found = true;
 778               tempToken = TokenNameboolCAST;
 779             } else {
 780               index = 0;
 781               if ((data[index] == 'i') && (data[++index] == 'n')
 782                   && (data[++index] == 't') && (data[++index] == 'e')
 783                   && (data[++index] == 'g') && (data[++index] == 'e')
 784                   && (data[++index] == 'r')) {
 785                 found = true;
 786                 tempToken = TokenNameintCAST;
 787               }
 788             }
 789             break;
 790         }
 791         if (found) {
 792           while (currentCharacter == ' ' || currentCharacter == '\t') {
 793             currentCharacter = source[currentPosition++];
 794           }
 795           if (currentCharacter == ')') {
 796             return tempToken;
 797           }
 798         }
 799       }
 800     } catch (IndexOutOfBoundsException e) {
 801     }
 802     currentCharacter = tempCharacter;
 803     currentPosition = tempPosition;
 804     return TokenNameLPAREN;
 805   }
 806   public int getNextToken() throws InvalidInputException {
 807     if (!phpMode) {
 808       return getInlinedHTML(currentPosition);
 809     }
 810     if (phpMode) {
 811       this.wasAcr = false;
 812       if (diet) {
 813         jumpOverMethodBody();
 814         diet = false;
 815         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
 816       }
 817       try {
 818         while (true) { //loop for jumping over comments
 819           withoutUnicodePtr = 0;
 820           //start with a new token (even comment written with unicode )
 821           // ---------Consume white space and handles startPosition---------
 822           int whiteStart = currentPosition;
 823           boolean isWhiteSpace;
 824           do {
 825             startPosition = currentPosition;
 826             currentCharacter = source[currentPosition++];
 827             //            if (((currentCharacter = source[currentPosition++]) == '\\')
 828             //              && (source[currentPosition] == 'u')) {
 829             //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
 830             //            } else {
 831             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 832               checkNonExternalizeString();
 833               if (recordLineSeparator) {
 834                 pushLineSeparator();
 835               } else {
 836                 currentLine = null;
 837               }
 838             }
 839             isWhiteSpace = (currentCharacter == ' ')
 840                 || Character.isWhitespace(currentCharacter);
 841             //            }
 842           } while (isWhiteSpace);
 843           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
 844             // reposition scanner in case we are interested by spaces as tokens
 845             currentPosition--;
 846             startPosition = whiteStart;
 847             return TokenNameWHITESPACE;
 848           }
 849           //little trick to get out in the middle of a source compuation
 850           if (currentPosition > eofPosition)
 851             return TokenNameEOF;
 852           // ---------Identify the next token-------------
 853           switch (currentCharacter) {
 854             case '(' :
 855               return getCastOrParen();
 856             case ')' :
 857               return TokenNameRPAREN;
 858             case '{' :
 859               return TokenNameLBRACE;
 860             case '}' :
 861               return TokenNameRBRACE;
 862             case '[' :
 863               return TokenNameLBRACKET;
 864             case ']' :
 865               return TokenNameRBRACKET;
 866             case ';' :
 867               return TokenNameSEMICOLON;
 868             case ',' :
 869               return TokenNameCOMMA;
 870             case '.' :
 871               if (getNextChar('='))
 872                 return TokenNameDOT_EQUAL;
 873               if (getNextCharAsDigit())
 874                 return scanNumber(true);
 875               return TokenNameDOT;
 876             case '+' :
 877               {
 878                 int test;
 879                 if ((test = getNextChar('+', '=')) == 0)
 880                   return TokenNamePLUS_PLUS;
 881                 if (test > 0)
 882                   return TokenNamePLUS_EQUAL;
 883                 return TokenNamePLUS;
 884               }
 885             case '-' :
 886               {
 887                 int test;
 888                 if ((test = getNextChar('-', '=')) == 0)
 889                   return TokenNameMINUS_MINUS;
 890                 if (test > 0)
 891                   return TokenNameMINUS_EQUAL;
 892                 if (getNextChar('>'))
 893                   return TokenNameMINUS_GREATER;
 894                 return TokenNameMINUS;
 895               }
 896             case '~' :
 897               if (getNextChar('='))
 898                 return TokenNameTWIDDLE_EQUAL;
 899               return TokenNameTWIDDLE;
 900             case '!' :
 901               if (getNextChar('=')) {
 902                 if (getNextChar('=')) {
 903                   return TokenNameNOT_EQUAL_EQUAL;
 904                 }
 905                 return TokenNameNOT_EQUAL;
 906               }
 907               return TokenNameNOT;
 908             case '*' :
 909               if (getNextChar('='))
 910                 return TokenNameMULTIPLY_EQUAL;
 911               return TokenNameMULTIPLY;
 912             case '%' :
 913               if (getNextChar('='))
 914                 return TokenNameREMAINDER_EQUAL;
 915               return TokenNameREMAINDER;
 916             case '<' :
 917               {
 918                 int oldPosition = currentPosition;
 919                 try {
 920                   currentCharacter = source[currentPosition++];
 921                 } catch (IndexOutOfBoundsException e) {
 922                   currentPosition = oldPosition;
 923                   return TokenNameLESS;
 924                 }
 925                 switch (currentCharacter) {
 926                   case '=' :
 927                     return TokenNameLESS_EQUAL;
 928                   case '>' :
 929                     return TokenNameNOT_EQUAL;
 930                   case '<' :
 931                     if (getNextChar('='))
 932                       return TokenNameLEFT_SHIFT_EQUAL;
 933                     if (getNextChar('<')) {
 934                       int heredocStart = currentPosition;
 935                       int heredocLength = 0;
 936                       currentCharacter = source[currentPosition++];
 937                       if (isPHPIdentifierStart(currentCharacter)) {
 938                         currentCharacter = source[currentPosition++];
 939                       } else {
 940                         return TokenNameERROR;
 941                       }
 942                       while (isPHPIdentifierPart(currentCharacter)) {
 943                         currentCharacter = source[currentPosition++];
 944                       }
 945                       heredocLength = currentPosition - heredocStart - 1;
 946                       // heredoc end-tag determination
 947                       boolean endTag = true;
 948                       char ch;
 949                       do {
 950                         ch = source[currentPosition++];
 951                         if (ch == '\r' || ch == '\n') {
 952                           if (recordLineSeparator) {
 953                             pushLineSeparator();
 954                           } else {
 955                             currentLine = null;
 956                           }
 957                           for (int i = 0; i < heredocLength; i++) {
 958                             if (source[currentPosition + i] != source[heredocStart
 959                                 + i]) {
 960                               endTag = false;
 961                               break;
 962                             }
 963                           }
 964                           if (endTag) {
 965                             currentPosition += heredocLength - 1;
 966                             currentCharacter = source[currentPosition++];
 967                             break; // do...while loop
 968                           } else {
 969                             endTag = true;
 970                           }
 971                         }
 972                       } while (true);
 973                       return TokenNameHEREDOC;
 974                     }
 975                     return TokenNameLEFT_SHIFT;
 976                 }
 977                 currentPosition = oldPosition;
 978                 return TokenNameLESS;
 979               }
 980             case '>' :
 981               {
 982                 int test;
 983                 if ((test = getNextChar('=', '>')) == 0)
 984                   return TokenNameGREATER_EQUAL;
 985                 if (test > 0) {
 986                   if ((test = getNextChar('=', '>')) == 0)
 987                     return TokenNameRIGHT_SHIFT_EQUAL;
 988                   return TokenNameRIGHT_SHIFT;
 989                 }
 990                 return TokenNameGREATER;
 991               }
 992             case '=' :
 993               if (getNextChar('=')) {
 994                 if (getNextChar('=')) {
 995                   return TokenNameEQUAL_EQUAL_EQUAL;
 996                 }
 997                 return TokenNameEQUAL_EQUAL;
 998               }
 999               if (getNextChar('>'))
1000                 return TokenNameEQUAL_GREATER;
1001               return TokenNameEQUAL;
1002             case '&' :
1003               {
1004                 int test;
1005                 if ((test = getNextChar('&', '=')) == 0)
1006                   return TokenNameAND_AND;
1007                 if (test > 0)
1008                   return TokenNameAND_EQUAL;
1009                 return TokenNameAND;
1010               }
1011             case '|' :
1012               {
1013                 int test;
1014                 if ((test = getNextChar('|', '=')) == 0)
1015                   return TokenNameOR_OR;
1016                 if (test > 0)
1017                   return TokenNameOR_EQUAL;
1018                 return TokenNameOR;
1019               }
1020             case '^' :
1021               if (getNextChar('='))
1022                 return TokenNameXOR_EQUAL;
1023               return TokenNameXOR;
1024             case '?' :
1025               if (getNextChar('>')) {
1026                 phpMode = false;
1027                 if (currentPosition==source.length) {
1028                   phpMode = true;
1029                   return TokenNameINLINE_HTML;
1030                 }
1031                 return getInlinedHTML(currentPosition - 2);
1032               }
1033               return TokenNameQUESTION;
1034             case ':' :
1035               if (getNextChar(':'))
1036                 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1037               return TokenNameCOLON;
1038             case '@' :
1039               return TokenNameAT;
1040             //                                  case '\'' :
1041             //                                          {
1042             //                                                  int test;
1043             //                                                  if ((test = getNextChar('\n', '\r')) == 0) {
1044             //                                                          throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1045             //                                                  }
1046             //                                                  if (test > 0) {
1047             //                                                          // relocate if finding another quote fairly close: thus unicode
1048             // '/u000D' will be fully consumed
1049             //                                                          for (int lookAhead = 0;
1050             //                                                                  lookAhead < 3;
1051             //                                                                  lookAhead++) {
1052             //                                                                  if (currentPosition + lookAhead
1053             //                                                                          == source.length)
1054             //                                                                          break;
1055             //                                                                  if (source[currentPosition + lookAhead]
1056             //                                                                          == '\n')
1057             //                                                                          break;
1058             //                                                                  if (source[currentPosition + lookAhead]
1059             //                                                                          == '\'') {
1060             //                                                                          currentPosition += lookAhead + 1;
1061             //                                                                          break;
1062             //                                                                  }
1063             //                                                          }
1064             //                                                          throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1065             //                                                  }
1066             //                                          }
1067             //                                          if (getNextChar('\'')) {
1068             //                                                  // relocate if finding another quote fairly close: thus unicode
1069             // '/u000D' will be fully consumed
1070             //                                                  for (int lookAhead = 0;
1071             //                                                          lookAhead < 3;
1072             //                                                          lookAhead++) {
1073             //                                                          if (currentPosition + lookAhead
1074             //                                                                  == source.length)
1075             //                                                                  break;
1076             //                                                          if (source[currentPosition + lookAhead]
1077             //                                                                  == '\n')
1078             //                                                                  break;
1079             //                                                          if (source[currentPosition + lookAhead]
1080             //                                                                  == '\'') {
1081             //                                                                  currentPosition += lookAhead + 1;
1082             //                                                                  break;
1083             //                                                          }
1084             //                                                  }
1085             //                                                  throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1086             //                                          }
1087             //                                          if (getNextChar('\\'))
1088             //                                                  scanEscapeCharacter();
1089             //                                          else { // consume next character
1090             //                                                  unicodeAsBackSlash = false;
1091             //                                                  if (((currentCharacter = source[currentPosition++])
1092             //                                                          == '\\')
1093             //                                                          && (source[currentPosition] == 'u')) {
1094             //                                                          getNextUnicodeChar();
1095             //                                                  } else {
1096             //                                                          if (withoutUnicodePtr != 0) {
1097             //                                                                  withoutUnicodeBuffer[++withoutUnicodePtr] =
1098             //                                                                          currentCharacter;
1099             //                                                          }
1100             //                                                  }
1101             //                                          }
1102             //                                          // if (getNextChar('\''))
1103             //                                          // return TokenNameCharacterLiteral;
1104             //                                          // relocate if finding another quote fairly close: thus unicode
1105             // '/u000D' will be fully consumed
1106             //                                          for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1107             //                                                  if (currentPosition + lookAhead == source.length)
1108             //                                                          break;
1109             //                                                  if (source[currentPosition + lookAhead] == '\n')
1110             //                                                          break;
1111             //                                                  if (source[currentPosition + lookAhead] == '\'') {
1112             //                                                          currentPosition += lookAhead + 1;
1113             //                                                          break;
1114             //                                                  }
1115             //                                          }
1116             //                                          throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1117             case '\'' :
1118               try {
1119                 // consume next character
1120                 unicodeAsBackSlash = false;
1121                 currentCharacter = source[currentPosition++];
1122                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1123                 //                  && (source[currentPosition] == 'u')) {
1124                 //                  getNextUnicodeChar();
1125                 //                } else {
1126                 //                  if (withoutUnicodePtr != 0) {
1127                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1128                 //                      currentCharacter;
1129                 //                  }
1130                 //                }
1131                 while (currentCharacter != '\'') {
1132                   /** ** in PHP \r and \n are valid in string literals *** */
1133                   //                  if ((currentCharacter == '\n')
1134                   //                    || (currentCharacter == '\r')) {
1135                   //                    // relocate if finding another quote fairly close: thus
1136                   // unicode '/u000D' will be fully consumed
1137                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1138                   //                      if (currentPosition + lookAhead == source.length)
1139                   //                        break;
1140                   //                      if (source[currentPosition + lookAhead] == '\n')
1141                   //                        break;
1142                   //                      if (source[currentPosition + lookAhead] == '\"') {
1143                   //                        currentPosition += lookAhead + 1;
1144                   //                        break;
1145                   //                      }
1146                   //                    }
1147                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1148                   //                  }
1149                   if (currentCharacter == '\\') {
1150                     int escapeSize = currentPosition;
1151                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1152                     //scanEscapeCharacter make a side effect on this value and
1153                     // we need the previous value few lines down this one
1154                     scanSingleQuotedEscapeCharacter();
1155                     escapeSize = currentPosition - escapeSize;
1156                     if (withoutUnicodePtr == 0) {
1157                       //buffer all the entries that have been left aside....
1158                       withoutUnicodePtr = currentPosition - escapeSize - 1
1159                           - startPosition;
1160                       System.arraycopy(source, startPosition,
1161                           withoutUnicodeBuffer, 1, withoutUnicodePtr);
1162                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1163                     } else { //overwrite the / in the buffer
1164                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1165                       if (backSlashAsUnicodeInString) { //there are TWO \ in
1166                         // the stream where
1167                         // only one is correct
1168                         withoutUnicodePtr--;
1169                       }
1170                     }
1171                   }
1172                   // consume next character
1173                   unicodeAsBackSlash = false;
1174                   currentCharacter = source[currentPosition++];
1175                   //                  if (((currentCharacter = source[currentPosition++]) ==
1176                   // '\\')
1177                   //                    && (source[currentPosition] == 'u')) {
1178                   //                    getNextUnicodeChar();
1179                   //                  } else {
1180                   if (withoutUnicodePtr != 0) {
1181                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1182                   }
1183                   //                  }
1184                 }
1185               } catch (IndexOutOfBoundsException e) {
1186                 throw new InvalidInputException(UNTERMINATED_STRING);
1187               } catch (InvalidInputException e) {
1188                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1189                   // relocate if finding another quote fairly close: thus
1190                   // unicode '/u000D' will be fully consumed
1191                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1192                     if (currentPosition + lookAhead == source.length)
1193                       break;
1194                     if (source[currentPosition + lookAhead] == '\n')
1195                       break;
1196                     if (source[currentPosition + lookAhead] == '\'') {
1197                       currentPosition += lookAhead + 1;
1198                       break;
1199                     }
1200                   }
1201                 }
1202                 throw e; // rethrow
1203               }
1204               if (checkNonExternalizedStringLiterals) { // check for presence
1205                 // of NLS tags
1206                 // //$NON-NLS-?$ where
1207                 // ? is an int.
1208                 if (currentLine == null) {
1209                   currentLine = new NLSLine();
1210                   lines.add(currentLine);
1211                 }
1212                 currentLine.add(new StringLiteral(
1213                     getCurrentTokenSourceString(), startPosition,
1214                     currentPosition - 1));
1215               }
1216               return TokenNameStringConstant;
1217             case '"' :
1218               try {
1219                 // consume next character
1220                 unicodeAsBackSlash = false;
1221                 currentCharacter = source[currentPosition++];
1222                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1223                 //                  && (source[currentPosition] == 'u')) {
1224                 //                  getNextUnicodeChar();
1225                 //                } else {
1226                 //                  if (withoutUnicodePtr != 0) {
1227                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1228                 //                      currentCharacter;
1229                 //                  }
1230                 //                }
1231                 while (currentCharacter != '"') {
1232                   /** ** in PHP \r and \n are valid in string literals *** */
1233                   //                  if ((currentCharacter == '\n')
1234                   //                    || (currentCharacter == '\r')) {
1235                   //                    // relocate if finding another quote fairly close: thus
1236                   // unicode '/u000D' will be fully consumed
1237                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1238                   //                      if (currentPosition + lookAhead == source.length)
1239                   //                        break;
1240                   //                      if (source[currentPosition + lookAhead] == '\n')
1241                   //                        break;
1242                   //                      if (source[currentPosition + lookAhead] == '\"') {
1243                   //                        currentPosition += lookAhead + 1;
1244                   //                        break;
1245                   //                      }
1246                   //                    }
1247                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1248                   //                  }
1249                   if (currentCharacter == '\\') {
1250                     int escapeSize = currentPosition;
1251                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1252                     //scanEscapeCharacter make a side effect on this value and
1253                     // we need the previous value few lines down this one
1254                     scanDoubleQuotedEscapeCharacter();
1255                     escapeSize = currentPosition - escapeSize;
1256                     if (withoutUnicodePtr == 0) {
1257                       //buffer all the entries that have been left aside....
1258                       withoutUnicodePtr = currentPosition - escapeSize - 1
1259                           - startPosition;
1260                       System.arraycopy(source, startPosition,
1261                           withoutUnicodeBuffer, 1, withoutUnicodePtr);
1262                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1263                     } else { //overwrite the / in the buffer
1264                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1265                       if (backSlashAsUnicodeInString) { //there are TWO \ in
1266                         // the stream where
1267                         // only one is correct
1268                         withoutUnicodePtr--;
1269                       }
1270                     }
1271                   }
1272                   // consume next character
1273                   unicodeAsBackSlash = false;
1274                   currentCharacter = source[currentPosition++];
1275                   //                  if (((currentCharacter = source[currentPosition++]) ==
1276                   // '\\')
1277                   //                    && (source[currentPosition] == 'u')) {
1278                   //                    getNextUnicodeChar();
1279                   //                  } else {
1280                   if (withoutUnicodePtr != 0) {
1281                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1282                   }
1283                   //                  }
1284                 }
1285               } catch (IndexOutOfBoundsException e) {
1286                 throw new InvalidInputException(UNTERMINATED_STRING);
1287               } catch (InvalidInputException e) {
1288                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1289                   // relocate if finding another quote fairly close: thus
1290                   // unicode '/u000D' will be fully consumed
1291                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1292                     if (currentPosition + lookAhead == source.length)
1293                       break;
1294                     if (source[currentPosition + lookAhead] == '\n')
1295                       break;
1296                     if (source[currentPosition + lookAhead] == '\"') {
1297                       currentPosition += lookAhead + 1;
1298                       break;
1299                     }
1300                   }
1301                 }
1302                 throw e; // rethrow
1303               }
1304               if (checkNonExternalizedStringLiterals) { // check for presence
1305                 // of NLS tags
1306                 // //$NON-NLS-?$ where
1307                 // ? is an int.
1308                 if (currentLine == null) {
1309                   currentLine = new NLSLine();
1310                   lines.add(currentLine);
1311                 }
1312                 currentLine.add(new StringLiteral(
1313                     getCurrentTokenSourceString(), startPosition,
1314                     currentPosition - 1));
1315               }
1316               return TokenNameStringLiteral;
1317             case '`' :
1318               try {
1319                 // consume next character
1320                 unicodeAsBackSlash = false;
1321                 currentCharacter = source[currentPosition++];
1322                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1323                 //                  && (source[currentPosition] == 'u')) {
1324                 //                  getNextUnicodeChar();
1325                 //                } else {
1326                 //                  if (withoutUnicodePtr != 0) {
1327                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1328                 //                      currentCharacter;
1329                 //                  }
1330                 //                }
1331                 while (currentCharacter != '`') {
1332                   /** ** in PHP \r and \n are valid in string literals *** */
1333                   //                if ((currentCharacter == '\n')
1334                   //                  || (currentCharacter == '\r')) {
1335                   //                  // relocate if finding another quote fairly close: thus
1336                   // unicode '/u000D' will be fully consumed
1337                   //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1338                   //                    if (currentPosition + lookAhead == source.length)
1339                   //                      break;
1340                   //                    if (source[currentPosition + lookAhead] == '\n')
1341                   //                      break;
1342                   //                    if (source[currentPosition + lookAhead] == '\"') {
1343                   //                      currentPosition += lookAhead + 1;
1344                   //                      break;
1345                   //                    }
1346                   //                  }
1347                   //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1348                   //                }
1349                   if (currentCharacter == '\\') {
1350                     int escapeSize = currentPosition;
1351                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1352                     //scanEscapeCharacter make a side effect on this value and
1353                     // we need the previous value few lines down this one
1354                     scanDoubleQuotedEscapeCharacter();
1355                     escapeSize = currentPosition - escapeSize;
1356                     if (withoutUnicodePtr == 0) {
1357                       //buffer all the entries that have been left aside....
1358                       withoutUnicodePtr = currentPosition - escapeSize - 1
1359                           - startPosition;
1360                       System.arraycopy(source, startPosition,
1361                           withoutUnicodeBuffer, 1, withoutUnicodePtr);
1362                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1363                     } else { //overwrite the / in the buffer
1364                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1365                       if (backSlashAsUnicodeInString) { //there are TWO \ in
1366                         // the stream where
1367                         // only one is correct
1368                         withoutUnicodePtr--;
1369                       }
1370                     }
1371                   }
1372                   // consume next character
1373                   unicodeAsBackSlash = false;
1374                   currentCharacter = source[currentPosition++];
1375                   //                  if (((currentCharacter = source[currentPosition++]) ==
1376                   // '\\')
1377                   //                    && (source[currentPosition] == 'u')) {
1378                   //                    getNextUnicodeChar();
1379                   //                  } else {
1380                   if (withoutUnicodePtr != 0) {
1381                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1382                   }
1383                   //                  }
1384                 }
1385               } catch (IndexOutOfBoundsException e) {
1386                 throw new InvalidInputException(UNTERMINATED_STRING);
1387               } catch (InvalidInputException e) {
1388                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1389                   // relocate if finding another quote fairly close: thus
1390                   // unicode '/u000D' will be fully consumed
1391                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1392                     if (currentPosition + lookAhead == source.length)
1393                       break;
1394                     if (source[currentPosition + lookAhead] == '\n')
1395                       break;
1396                     if (source[currentPosition + lookAhead] == '`') {
1397                       currentPosition += lookAhead + 1;
1398                       break;
1399                     }
1400                   }
1401                 }
1402                 throw e; // rethrow
1403               }
1404               if (checkNonExternalizedStringLiterals) { // check for presence
1405                 // of NLS tags
1406                 // //$NON-NLS-?$ where
1407                 // ? is an int.
1408                 if (currentLine == null) {
1409                   currentLine = new NLSLine();
1410                   lines.add(currentLine);
1411                 }
1412                 currentLine.add(new StringLiteral(
1413                     getCurrentTokenSourceString(), startPosition,
1414                     currentPosition - 1));
1415               }
1416               return TokenNameStringInterpolated;
1417             case '#' :
1418             case '/' :
1419               {
1420                 char startChar = currentCharacter;
1421                 if (getNextChar('=')) {
1422                   return TokenNameDIVIDE_EQUAL;
1423                 }
1424                 int test;
1425                 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1426                   //line comment
1427                   int endPositionForLineComment = 0;
1428                   try { //get the next char
1429                     currentCharacter = source[currentPosition++];
1430                     //                    if (((currentCharacter = source[currentPosition++])
1431                     //                      == '\\')
1432                     //                      && (source[currentPosition] == 'u')) {
1433                     //                      //-------------unicode traitement ------------
1434                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1435                     //                      currentPosition++;
1436                     //                      while (source[currentPosition] == 'u') {
1437                     //                        currentPosition++;
1438                     //                      }
1439                     //                      if ((c1 =
1440                     //                        Character.getNumericValue(source[currentPosition++]))
1441                     //                        > 15
1442                     //                        || c1 < 0
1443                     //                        || (c2 =
1444                     //                          Character.getNumericValue(source[currentPosition++]))
1445                     //                          > 15
1446                     //                        || c2 < 0
1447                     //                        || (c3 =
1448                     //                          Character.getNumericValue(source[currentPosition++]))
1449                     //                          > 15
1450                     //                        || c3 < 0
1451                     //                        || (c4 =
1452                     //                          Character.getNumericValue(source[currentPosition++]))
1453                     //                          > 15
1454                     //                        || c4 < 0) {
1455                     //                        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1456                     //                      } else {
1457                     //                        currentCharacter =
1458                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1459                     //                      }
1460                     //                    }
1461                     //handle the \\u case manually into comment
1462                     //                    if (currentCharacter == '\\') {
1463                     //                      if (source[currentPosition] == '\\')
1464                     //                        currentPosition++;
1465                     //                    } //jump over the \\
1466                     boolean isUnicode = false;
1467                     while (currentCharacter != '\r' && currentCharacter != '\n') {
1468                       if (currentCharacter == '?') {
1469                         if (getNextChar('>')) {
1470                           startPosition = currentPosition - 2;
1471                           phpMode = false;
1472                           return TokenNameINLINE_HTML;
1473                         }
1474                       }
1475                       //get the next char
1476                       isUnicode = false;
1477                       currentCharacter = source[currentPosition++];
1478                       //                      if (((currentCharacter = source[currentPosition++])
1479                       //                        == '\\')
1480                       //                        && (source[currentPosition] == 'u')) {
1481                       //                        isUnicode = true;
1482                       //                        //-------------unicode traitement ------------
1483                       //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1484                       //                        currentPosition++;
1485                       //                        while (source[currentPosition] == 'u') {
1486                       //                          currentPosition++;
1487                       //                        }
1488                       //                        if ((c1 =
1489                       //                          Character.getNumericValue(source[currentPosition++]))
1490                       //                          > 15
1491                       //                          || c1 < 0
1492                       //                          || (c2 =
1493                       //                            Character.getNumericValue(
1494                       //                              source[currentPosition++]))
1495                       //                            > 15
1496                       //                          || c2 < 0
1497                       //                          || (c3 =
1498                       //                            Character.getNumericValue(
1499                       //                              source[currentPosition++]))
1500                       //                            > 15
1501                       //                          || c3 < 0
1502                       //                          || (c4 =
1503                       //                            Character.getNumericValue(
1504                       //                              source[currentPosition++]))
1505                       //                            > 15
1506                       //                          || c4 < 0) {
1507                       //                          throw new
1508                       // InvalidInputException(INVALID_UNICODE_ESCAPE);
1509                       //                        } else {
1510                       //                          currentCharacter =
1511                       //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1512                       //                        }
1513                       //                      }
1514                       //handle the \\u case manually into comment
1515                       //                      if (currentCharacter == '\\') {
1516                       //                        if (source[currentPosition] == '\\')
1517                       //                          currentPosition++;
1518                       //                      } //jump over the \\
1519                     }
1520                     if (isUnicode) {
1521                       endPositionForLineComment = currentPosition - 6;
1522                     } else {
1523                       endPositionForLineComment = currentPosition - 1;
1524                     }
1525                     recordComment(false);
1526                     if ((currentCharacter == '\r')
1527                         || (currentCharacter == '\n')) {
1528                       checkNonExternalizeString();
1529                       if (recordLineSeparator) {
1530                         if (isUnicode) {
1531                           pushUnicodeLineSeparator();
1532                         } else {
1533                           pushLineSeparator();
1534                         }
1535                       } else {
1536                         currentLine = null;
1537                       }
1538                     }
1539                     if (tokenizeComments) {
1540                       if (!isUnicode) {
1541                         currentPosition = endPositionForLineComment;
1542                         // reset one character behind
1543                       }
1544                       return TokenNameCOMMENT_LINE;
1545                     }
1546                   } catch (IndexOutOfBoundsException e) { //an eof will them
1547                     // be generated
1548                     if (tokenizeComments) {
1549                       currentPosition--;
1550                       // reset one character behind
1551                       return TokenNameCOMMENT_LINE;
1552                     }
1553                   }
1554                   break;
1555                 }
1556                 if (test > 0) {
1557                   //traditional and annotation comment
1558                   boolean isJavadoc = false, star = false;
1559                   // consume next character
1560                   unicodeAsBackSlash = false;
1561                   currentCharacter = source[currentPosition++];
1562                   //                  if (((currentCharacter = source[currentPosition++]) ==
1563                   // '\\')
1564                   //                    && (source[currentPosition] == 'u')) {
1565                   //                    getNextUnicodeChar();
1566                   //                  } else {
1567                   //                    if (withoutUnicodePtr != 0) {
1568                   //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1569                   //                        currentCharacter;
1570                   //                    }
1571                   //                  }
1572                   if (currentCharacter == '*') {
1573                     isJavadoc = true;
1574                     star = true;
1575                   }
1576                   if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1577                     checkNonExternalizeString();
1578                     if (recordLineSeparator) {
1579                       pushLineSeparator();
1580                     } else {
1581                       currentLine = null;
1582                     }
1583                   }
1584                   try { //get the next char
1585                     currentCharacter = source[currentPosition++];
1586                     //                    if (((currentCharacter = source[currentPosition++])
1587                     //                      == '\\')
1588                     //                      && (source[currentPosition] == 'u')) {
1589                     //                      //-------------unicode traitement ------------
1590                     //                      getNextUnicodeChar();
1591                     //                    }
1592                     //handle the \\u case manually into comment
1593                     //                    if (currentCharacter == '\\') {
1594                     //                      if (source[currentPosition] == '\\')
1595                     //                        currentPosition++;
1596                     //                      //jump over the \\
1597                     //                    }
1598                     // empty comment is not a javadoc /**/
1599                     if (currentCharacter == '/') {
1600                       isJavadoc = false;
1601                     }
1602                     //loop until end of comment */
1603                     while ((currentCharacter != '/') || (!star)) {
1604                       if ((currentCharacter == '\r')
1605                           || (currentCharacter == '\n')) {
1606                         checkNonExternalizeString();
1607                         if (recordLineSeparator) {
1608                           pushLineSeparator();
1609                         } else {
1610                           currentLine = null;
1611                         }
1612                       }
1613                       star = currentCharacter == '*';
1614                       //get next char
1615                       currentCharacter = source[currentPosition++];
1616                       //                      if (((currentCharacter = source[currentPosition++])
1617                       //                        == '\\')
1618                       //                        && (source[currentPosition] == 'u')) {
1619                       //                        //-------------unicode traitement ------------
1620                       //                        getNextUnicodeChar();
1621                       //                      }
1622                       //handle the \\u case manually into comment
1623                       //                      if (currentCharacter == '\\') {
1624                       //                        if (source[currentPosition] == '\\')
1625                       //                          currentPosition++;
1626                       //                      } //jump over the \\
1627                     }
1628                     recordComment(isJavadoc);
1629                     if (tokenizeComments) {
1630                       if (isJavadoc)
1631                         return TokenNameCOMMENT_PHPDOC;
1632                       return TokenNameCOMMENT_BLOCK;
1633                     }
1634                   } catch (IndexOutOfBoundsException e) {
1635                     throw new InvalidInputException(UNTERMINATED_COMMENT);
1636                   }
1637                   break;
1638                 }
1639                 return TokenNameDIVIDE;
1640               }
1641             case '\u001a' :
1642               if (atEnd())
1643                 return TokenNameEOF;
1644               //the atEnd may not be <currentPosition == source.length> if
1645               // source is only some part of a real (external) stream
1646               throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1647             default :
1648               if (currentCharacter == '$') {
1649                 int oldPosition = currentPosition;
1650                 try {
1651                   currentCharacter = source[currentPosition++];
1652                   if (isPHPIdentifierStart(currentCharacter)) {
1653                     return scanIdentifierOrKeyword(true);
1654                   } else {
1655                     currentPosition = oldPosition;
1656                     return TokenNameDOLLAR;
1657                   }
1658                 } catch (IndexOutOfBoundsException e) {
1659                   currentPosition = oldPosition;
1660                   return TokenNameDOLLAR;
1661                 }
1662               }
1663               if (isPHPIdentifierStart(currentCharacter))
1664                 return scanIdentifierOrKeyword(false);
1665               if (Character.isDigit(currentCharacter))
1666                 return scanNumber(false);
1667               return TokenNameERROR;
1668           }
1669         }
1670       } //-----------------end switch while try--------------------
1671       catch (IndexOutOfBoundsException e) {
1672       }
1673     }
1674     return TokenNameEOF;
1675   }
1676   /**
1677    * @return @throws
1678    *         InvalidInputException
1679    */
1680   private int getInlinedHTML(int start) throws InvalidInputException {
1681     //    int htmlPosition = start;
1682     if (currentPosition>source.length) {
1683         currentPosition = source.length;
1684         return TokenNameEOF;
1685     }
1686     startPosition = start;
1687     try {
1688       while (!phpMode) {
1689         currentCharacter = source[currentPosition++];
1690         if (currentCharacter == '<') {
1691           if (getNextChar('?')) {
1692             currentCharacter = source[currentPosition++];
1693             if ((currentCharacter == ' ')
1694                 || Character.isWhitespace(currentCharacter)) {
1695               // <?
1696               phpMode = true;
1697               return TokenNameINLINE_HTML;
1698             } else {
1699               boolean phpStart = (currentCharacter == 'P')
1700                   || (currentCharacter == 'p');
1701               if (phpStart) {
1702                 int test = getNextChar('H', 'h');
1703                 if (test >= 0) {
1704                   test = getNextChar('P', 'p');
1705                   if (test >= 0) {
1706                     // <?PHP <?php
1707                     phpMode = true;
1708                     return TokenNameINLINE_HTML;
1709                   }
1710                 }
1711               }
1712             }
1713           }
1714         }
1715         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1716           if (recordLineSeparator) {
1717             pushLineSeparator();
1718           } else {
1719             currentLine = null;
1720           }
1721         }
1722       } //-----------------while--------------------
1723       phpMode = true;
1724       return TokenNameINLINE_HTML;
1725     } //-----------------try--------------------
1726     catch (IndexOutOfBoundsException e) {
1727       startPosition = start;
1728       currentPosition--;
1729     }
1730     phpMode = true;
1731     return TokenNameINLINE_HTML;
1732   }
1733   //  public final void getNextUnicodeChar()
1734   //    throws IndexOutOfBoundsException, InvalidInputException {
1735   //    //VOID
1736   //    //handle the case of unicode.
1737   //    //when a unicode appears then we must use a buffer that holds char
1738   // internal values
1739   //    //At the end of this method currentCharacter holds the new visited char
1740   //    //and currentPosition points right next after it
1741   //
1742   //    //ALL getNextChar.... ARE OPTIMIZED COPIES
1743   //
1744   //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1745   //    currentPosition++;
1746   //    while (source[currentPosition] == 'u') {
1747   //      currentPosition++;
1748   //      unicodeSize++;
1749   //    }
1750   //
1751   //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1752   //      || c1 < 0
1753   //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1754   //      || c2 < 0
1755   //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1756   //      || c3 < 0
1757   //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1758   //      || c4 < 0) {
1759   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1760   //    } else {
1761   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1762   //      //need the unicode buffer
1763   //      if (withoutUnicodePtr == 0) {
1764   //        //buffer all the entries that have been left aside....
1765   //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1766   //        System.arraycopy(
1767   //          source,
1768   //          startPosition,
1769   //          withoutUnicodeBuffer,
1770   //          1,
1771   //          withoutUnicodePtr);
1772   //      }
1773   //      //fill the buffer with the char
1774   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1775   //    }
1776   //    unicodeAsBackSlash = currentCharacter == '\\';
1777   //  }
1778   /*
1779    * Tokenize a method body, assuming that curly brackets are properly
1780    * balanced.
1781    */
1782   public final void jumpOverMethodBody() {
1783     this.wasAcr = false;
1784     int found = 1;
1785     try {
1786       while (true) { //loop for jumping over comments
1787         // ---------Consume white space and handles startPosition---------
1788         boolean isWhiteSpace;
1789         do {
1790           startPosition = currentPosition;
1791           currentCharacter = source[currentPosition++];
1792           //          if (((currentCharacter = source[currentPosition++]) == '\\')
1793           //            && (source[currentPosition] == 'u')) {
1794           //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
1795           //          } else {
1796           if (recordLineSeparator
1797               && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1798             pushLineSeparator();
1799           isWhiteSpace = Character.isWhitespace(currentCharacter);
1800           //          }
1801         } while (isWhiteSpace);
1802         // -------consume token until } is found---------
1803         switch (currentCharacter) {
1804           case '{' :
1805             found++;
1806             break;
1807           case '}' :
1808             found--;
1809             if (found == 0)
1810               return;
1811             break;
1812           case '\'' :
1813             {
1814               boolean test;
1815               test = getNextChar('\\');
1816               if (test) {
1817                 try {
1818                   scanDoubleQuotedEscapeCharacter();
1819                 } catch (InvalidInputException ex) {
1820                 };
1821               } else {
1822                 //                try { // consume next character
1823                 unicodeAsBackSlash = false;
1824                 currentCharacter = source[currentPosition++];
1825                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1826                 //                    && (source[currentPosition] == 'u')) {
1827                 //                    getNextUnicodeChar();
1828                 //                  } else {
1829                 if (withoutUnicodePtr != 0) {
1830                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1831                 }
1832                 //                  }
1833                 //                } catch (InvalidInputException ex) {
1834                 //                };
1835               }
1836               getNextChar('\'');
1837               break;
1838             }
1839           case '"' :
1840             try {
1841               //              try { // consume next character
1842               unicodeAsBackSlash = false;
1843               currentCharacter = source[currentPosition++];
1844               //                if (((currentCharacter = source[currentPosition++]) == '\\')
1845               //                  && (source[currentPosition] == 'u')) {
1846               //                  getNextUnicodeChar();
1847               //                } else {
1848               if (withoutUnicodePtr != 0) {
1849                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1850               }
1851               //                }
1852               //              } catch (InvalidInputException ex) {
1853               //              };
1854               while (currentCharacter != '"') {
1855                 if (currentCharacter == '\r') {
1856                   if (source[currentPosition] == '\n')
1857                     currentPosition++;
1858                   break;
1859                   // the string cannot go further that the line
1860                 }
1861                 if (currentCharacter == '\n') {
1862                   break;
1863                   // the string cannot go further that the line
1864                 }
1865                 if (currentCharacter == '\\') {
1866                   try {
1867                     scanDoubleQuotedEscapeCharacter();
1868                   } catch (InvalidInputException ex) {
1869                   };
1870                 }
1871                 //                try { // consume next character
1872                 unicodeAsBackSlash = false;
1873                 currentCharacter = source[currentPosition++];
1874                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1875                 //                    && (source[currentPosition] == 'u')) {
1876                 //                    getNextUnicodeChar();
1877                 //                  } else {
1878                 if (withoutUnicodePtr != 0) {
1879                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1880                 }
1881                 //                  }
1882                 //                } catch (InvalidInputException ex) {
1883                 //                };
1884               }
1885             } catch (IndexOutOfBoundsException e) {
1886               return;
1887             }
1888             break;
1889           case '/' :
1890             {
1891               int test;
1892               if ((test = getNextChar('/', '*')) == 0) {
1893                 //line comment
1894                 try {
1895                   //get the next char
1896                   currentCharacter = source[currentPosition++];
1897                   //                  if (((currentCharacter = source[currentPosition++]) ==
1898                   // '\\')
1899                   //                    && (source[currentPosition] == 'u')) {
1900                   //                    //-------------unicode traitement ------------
1901                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1902                   //                    currentPosition++;
1903                   //                    while (source[currentPosition] == 'u') {
1904                   //                      currentPosition++;
1905                   //                    }
1906                   //                    if ((c1 =
1907                   //                      Character.getNumericValue(source[currentPosition++]))
1908                   //                      > 15
1909                   //                      || c1 < 0
1910                   //                      || (c2 =
1911                   //                        Character.getNumericValue(source[currentPosition++]))
1912                   //                        > 15
1913                   //                      || c2 < 0
1914                   //                      || (c3 =
1915                   //                        Character.getNumericValue(source[currentPosition++]))
1916                   //                        > 15
1917                   //                      || c3 < 0
1918                   //                      || (c4 =
1919                   //                        Character.getNumericValue(source[currentPosition++]))
1920                   //                        > 15
1921                   //                      || c4 < 0) {
1922                   //                      //error don't care of the value
1923                   //                      currentCharacter = 'A';
1924                   //                    } //something different from \n and \r
1925                   //                    else {
1926                   //                      currentCharacter =
1927                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1928                   //                    }
1929                   //                  }
1930                   while (currentCharacter != '\r' && currentCharacter != '\n') {
1931                     //get the next char
1932                     currentCharacter = source[currentPosition++];
1933                     //                    if (((currentCharacter = source[currentPosition++])
1934                     //                      == '\\')
1935                     //                      && (source[currentPosition] == 'u')) {
1936                     //                      //-------------unicode traitement ------------
1937                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1938                     //                      currentPosition++;
1939                     //                      while (source[currentPosition] == 'u') {
1940                     //                        currentPosition++;
1941                     //                      }
1942                     //                      if ((c1 =
1943                     //                        Character.getNumericValue(source[currentPosition++]))
1944                     //                        > 15
1945                     //                        || c1 < 0
1946                     //                        || (c2 =
1947                     //                          Character.getNumericValue(source[currentPosition++]))
1948                     //                          > 15
1949                     //                        || c2 < 0
1950                     //                        || (c3 =
1951                     //                          Character.getNumericValue(source[currentPosition++]))
1952                     //                          > 15
1953                     //                        || c3 < 0
1954                     //                        || (c4 =
1955                     //                          Character.getNumericValue(source[currentPosition++]))
1956                     //                          > 15
1957                     //                        || c4 < 0) {
1958                     //                        //error don't care of the value
1959                     //                        currentCharacter = 'A';
1960                     //                      } //something different from \n and \r
1961                     //                      else {
1962                     //                        currentCharacter =
1963                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1964                     //                      }
1965                     //                    }
1966                   }
1967                   if (recordLineSeparator
1968                       && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1969                     pushLineSeparator();
1970                 } catch (IndexOutOfBoundsException e) {
1971                 } //an eof will them be generated
1972                 break;
1973               }
1974               if (test > 0) {
1975                 //traditional and annotation comment
1976                 boolean star = false;
1977                 //                try { // consume next character
1978                 unicodeAsBackSlash = false;
1979                 currentCharacter = source[currentPosition++];
1980                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1981                 //                    && (source[currentPosition] == 'u')) {
1982                 //                    getNextUnicodeChar();
1983                 //                  } else {
1984                 if (withoutUnicodePtr != 0) {
1985                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1986                 }
1987                 //                  };
1988                 //                } catch (InvalidInputException ex) {
1989                 //                };
1990                 if (currentCharacter == '*') {
1991                   star = true;
1992                 }
1993                 if (recordLineSeparator
1994                     && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1995                   pushLineSeparator();
1996                 try { //get the next char
1997                   currentCharacter = source[currentPosition++];
1998                   //                  if (((currentCharacter = source[currentPosition++]) ==
1999                   // '\\')
2000                   //                    && (source[currentPosition] == 'u')) {
2001                   //                    //-------------unicode traitement ------------
2002                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2003                   //                    currentPosition++;
2004                   //                    while (source[currentPosition] == 'u') {
2005                   //                      currentPosition++;
2006                   //                    }
2007                   //                    if ((c1 =
2008                   //                      Character.getNumericValue(source[currentPosition++]))
2009                   //                      > 15
2010                   //                      || c1 < 0
2011                   //                      || (c2 =
2012                   //                        Character.getNumericValue(source[currentPosition++]))
2013                   //                        > 15
2014                   //                      || c2 < 0
2015                   //                      || (c3 =
2016                   //                        Character.getNumericValue(source[currentPosition++]))
2017                   //                        > 15
2018                   //                      || c3 < 0
2019                   //                      || (c4 =
2020                   //                        Character.getNumericValue(source[currentPosition++]))
2021                   //                        > 15
2022                   //                      || c4 < 0) {
2023                   //                      //error don't care of the value
2024                   //                      currentCharacter = 'A';
2025                   //                    } //something different from * and /
2026                   //                    else {
2027                   //                      currentCharacter =
2028                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2029                   //                    }
2030                   //                  }
2031                   //loop until end of comment */
2032                   while ((currentCharacter != '/') || (!star)) {
2033                     if (recordLineSeparator
2034                         && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2035                       pushLineSeparator();
2036                     star = currentCharacter == '*';
2037                     //get next char
2038                     currentCharacter = source[currentPosition++];
2039                     //                    if (((currentCharacter = source[currentPosition++])
2040                     //                      == '\\')
2041                     //                      && (source[currentPosition] == 'u')) {
2042                     //                      //-------------unicode traitement ------------
2043                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2044                     //                      currentPosition++;
2045                     //                      while (source[currentPosition] == 'u') {
2046                     //                        currentPosition++;
2047                     //                      }
2048                     //                      if ((c1 =
2049                     //                        Character.getNumericValue(source[currentPosition++]))
2050                     //                        > 15
2051                     //                        || c1 < 0
2052                     //                        || (c2 =
2053                     //                          Character.getNumericValue(source[currentPosition++]))
2054                     //                          > 15
2055                     //                        || c2 < 0
2056                     //                        || (c3 =
2057                     //                          Character.getNumericValue(source[currentPosition++]))
2058                     //                          > 15
2059                     //                        || c3 < 0
2060                     //                        || (c4 =
2061                     //                          Character.getNumericValue(source[currentPosition++]))
2062                     //                          > 15
2063                     //                        || c4 < 0) {
2064                     //                        //error don't care of the value
2065                     //                        currentCharacter = 'A';
2066                     //                      } //something different from * and /
2067                     //                      else {
2068                     //                        currentCharacter =
2069                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2070                     //                      }
2071                     //                    }
2072                   }
2073                 } catch (IndexOutOfBoundsException e) {
2074                   return;
2075                 }
2076                 break;
2077               }
2078               break;
2079             }
2080           default :
2081             if (isPHPIdentifierStart(currentCharacter)
2082                 || currentCharacter == '$') {
2083               try {
2084                 scanIdentifierOrKeyword((currentCharacter == '$'));
2085               } catch (InvalidInputException ex) {
2086               };
2087               break;
2088             }
2089             if (Character.isDigit(currentCharacter)) {
2090               try {
2091                 scanNumber(false);
2092               } catch (InvalidInputException ex) {
2093               };
2094               break;
2095             }
2096         }
2097       }
2098       //-----------------end switch while try--------------------
2099     } catch (IndexOutOfBoundsException e) {
2100     } catch (InvalidInputException e) {
2101     }
2102     return;
2103   }
2104   //  public final boolean jumpOverUnicodeWhiteSpace()
2105   //    throws InvalidInputException {
2106   //    //BOOLEAN
2107   //    //handle the case of unicode. Jump over the next whiteSpace
2108   //    //making startPosition pointing on the next available char
2109   //    //On false, the currentCharacter is filled up with a potential
2110   //    //correct char
2111   //
2112   //    try {
2113   //      this.wasAcr = false;
2114   //      int c1, c2, c3, c4;
2115   //      int unicodeSize = 6;
2116   //      currentPosition++;
2117   //      while (source[currentPosition] == 'u') {
2118   //        currentPosition++;
2119   //        unicodeSize++;
2120   //      }
2121   //
2122   //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2123   //        || c1 < 0)
2124   //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2125   //          || c2 < 0)
2126   //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2127   //          || c3 < 0)
2128   //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2129   //          || c4 < 0)) {
2130   //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2131   //      }
2132   //
2133   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2134   //      if (recordLineSeparator
2135   //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2136   //        pushLineSeparator();
2137   //      if (Character.isWhitespace(currentCharacter))
2138   //        return true;
2139   //
2140   //      //buffer the new char which is not a white space
2141   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2142   //      //withoutUnicodePtr == 1 is true here
2143   //      return false;
2144   //    } catch (IndexOutOfBoundsException e) {
2145   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2146   //    }
2147   //  }
2148   public final int[] getLineEnds() {
2149     //return a bounded copy of this.lineEnds
2150     int[] copy;
2151     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2152     return copy;
2153   }
2154   public char[] getSource() {
2155     return this.source;
2156   }
2157   final char[] optimizedCurrentTokenSource1() {
2158     //return always the same char[] build only once
2159     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2160     char charOne = source[startPosition];
2161     switch (charOne) {
2162       case 'a' :
2163         return charArray_a;
2164       case 'b' :
2165         return charArray_b;
2166       case 'c' :
2167         return charArray_c;
2168       case 'd' :
2169         return charArray_d;
2170       case 'e' :
2171         return charArray_e;
2172       case 'f' :
2173         return charArray_f;
2174       case 'g' :
2175         return charArray_g;
2176       case 'h' :
2177         return charArray_h;
2178       case 'i' :
2179         return charArray_i;
2180       case 'j' :
2181         return charArray_j;
2182       case 'k' :
2183         return charArray_k;
2184       case 'l' :
2185         return charArray_l;
2186       case 'm' :
2187         return charArray_m;
2188       case 'n' :
2189         return charArray_n;
2190       case 'o' :
2191         return charArray_o;
2192       case 'p' :
2193         return charArray_p;
2194       case 'q' :
2195         return charArray_q;
2196       case 'r' :
2197         return charArray_r;
2198       case 's' :
2199         return charArray_s;
2200       case 't' :
2201         return charArray_t;
2202       case 'u' :
2203         return charArray_u;
2204       case 'v' :
2205         return charArray_v;
2206       case 'w' :
2207         return charArray_w;
2208       case 'x' :
2209         return charArray_x;
2210       case 'y' :
2211         return charArray_y;
2212       case 'z' :
2213         return charArray_z;
2214       default :
2215         return new char[]{charOne};
2216     }
2217   }
2218   final char[] optimizedCurrentTokenSource2() {
2219     //try to return the same char[] build only once
2220     char c0, c1;
2221     int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2222         % TableSize;
2223     char[][] table = charArray_length[0][hash];
2224     int i = newEntry2;
2225     while (++i < InternalTableSize) {
2226       char[] charArray = table[i];
2227       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2228         return charArray;
2229     }
2230     //---------other side---------
2231     i = -1;
2232     int max = newEntry2;
2233     while (++i <= max) {
2234       char[] charArray = table[i];
2235       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2236         return charArray;
2237     }
2238     //--------add the entry-------
2239     if (++max >= InternalTableSize)
2240       max = 0;
2241     char[] r;
2242     table[max] = (r = new char[]{c0, c1});
2243     newEntry2 = max;
2244     return r;
2245   }
2246   final char[] optimizedCurrentTokenSource3() {
2247     //try to return the same char[] build only once
2248     char c0, c1, c2;
2249     int hash = (((c0 = source[startPosition]) << 12)
2250         + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2251         % TableSize;
2252     char[][] table = charArray_length[1][hash];
2253     int i = newEntry3;
2254     while (++i < InternalTableSize) {
2255       char[] charArray = table[i];
2256       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2257         return charArray;
2258     }
2259     //---------other side---------
2260     i = -1;
2261     int max = newEntry3;
2262     while (++i <= max) {
2263       char[] charArray = table[i];
2264       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2265         return charArray;
2266     }
2267     //--------add the entry-------
2268     if (++max >= InternalTableSize)
2269       max = 0;
2270     char[] r;
2271     table[max] = (r = new char[]{c0, c1, c2});
2272     newEntry3 = max;
2273     return r;
2274   }
2275   final char[] optimizedCurrentTokenSource4() {
2276     //try to return the same char[] build only once
2277     char c0, c1, c2, c3;
2278     long hash = ((((long) (c0 = source[startPosition])) << 18)
2279         + ((c1 = source[startPosition + 1]) << 12)
2280         + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2281         % TableSize;
2282     char[][] table = charArray_length[2][(int) hash];
2283     int i = newEntry4;
2284     while (++i < InternalTableSize) {
2285       char[] charArray = table[i];
2286       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2287           && (c3 == charArray[3]))
2288         return charArray;
2289     }
2290     //---------other side---------
2291     i = -1;
2292     int max = newEntry4;
2293     while (++i <= max) {
2294       char[] charArray = table[i];
2295       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2296           && (c3 == charArray[3]))
2297         return charArray;
2298     }
2299     //--------add the entry-------
2300     if (++max >= InternalTableSize)
2301       max = 0;
2302     char[] r;
2303     table[max] = (r = new char[]{c0, c1, c2, c3});
2304     newEntry4 = max;
2305     return r;
2306   }
2307   final char[] optimizedCurrentTokenSource5() {
2308     //try to return the same char[] build only once
2309     char c0, c1, c2, c3, c4;
2310     long hash = ((((long) (c0 = source[startPosition])) << 24)
2311         + (((long) (c1 = source[startPosition + 1])) << 18)
2312         + ((c2 = source[startPosition + 2]) << 12)
2313         + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2314         % TableSize;
2315     char[][] table = charArray_length[3][(int) hash];
2316     int i = newEntry5;
2317     while (++i < InternalTableSize) {
2318       char[] charArray = table[i];
2319       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2320           && (c3 == charArray[3]) && (c4 == charArray[4]))
2321         return charArray;
2322     }
2323     //---------other side---------
2324     i = -1;
2325     int max = newEntry5;
2326     while (++i <= max) {
2327       char[] charArray = table[i];
2328       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2329           && (c3 == charArray[3]) && (c4 == charArray[4]))
2330         return charArray;
2331     }
2332     //--------add the entry-------
2333     if (++max >= InternalTableSize)
2334       max = 0;
2335     char[] r;
2336     table[max] = (r = new char[]{c0, c1, c2, c3, c4});
2337     newEntry5 = max;
2338     return r;
2339   }
2340   final char[] optimizedCurrentTokenSource6() {
2341     //try to return the same char[] build only once
2342     char c0, c1, c2, c3, c4, c5;
2343     long hash = ((((long) (c0 = source[startPosition])) << 32)
2344         + (((long) (c1 = source[startPosition + 1])) << 24)
2345         + (((long) (c2 = source[startPosition + 2])) << 18)
2346         + ((c3 = source[startPosition + 3]) << 12)
2347         + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2348         % TableSize;
2349     char[][] table = charArray_length[4][(int) hash];
2350     int i = newEntry6;
2351     while (++i < InternalTableSize) {
2352       char[] charArray = table[i];
2353       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2354           && (c3 == charArray[3]) && (c4 == charArray[4])
2355           && (c5 == charArray[5]))
2356         return charArray;
2357     }
2358     //---------other side---------
2359     i = -1;
2360     int max = newEntry6;
2361     while (++i <= max) {
2362       char[] charArray = table[i];
2363       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2364           && (c3 == charArray[3]) && (c4 == charArray[4])
2365           && (c5 == charArray[5]))
2366         return charArray;
2367     }
2368     //--------add the entry-------
2369     if (++max >= InternalTableSize)
2370       max = 0;
2371     char[] r;
2372     table[max] = (r = new char[]{c0, c1, c2, c3, c4, c5});
2373     newEntry6 = max;
2374     return r;
2375   }
2376   public final void pushLineSeparator() throws InvalidInputException {
2377     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2378     final int INCREMENT = 250;
2379     if (this.checkNonExternalizedStringLiterals) {
2380       // reinitialize the current line for non externalize strings purpose
2381       currentLine = null;
2382     }
2383     //currentCharacter is at position currentPosition-1
2384     // cr 000D
2385     if (currentCharacter == '\r') {
2386       int separatorPos = currentPosition - 1;
2387       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2388         return;
2389       //System.out.println("CR-" + separatorPos);
2390       try {
2391         lineEnds[++linePtr] = separatorPos;
2392       } catch (IndexOutOfBoundsException e) {
2393         //linePtr value is correct
2394         int oldLength = lineEnds.length;
2395         int[] old = lineEnds;
2396         lineEnds = new int[oldLength + INCREMENT];
2397         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2398         lineEnds[linePtr] = separatorPos;
2399       }
2400       // look-ahead for merged cr+lf
2401       try {
2402         if (source[currentPosition] == '\n') {
2403           //System.out.println("look-ahead LF-" + currentPosition);
2404           lineEnds[linePtr] = currentPosition;
2405           currentPosition++;
2406           wasAcr = false;
2407         } else {
2408           wasAcr = true;
2409         }
2410       } catch (IndexOutOfBoundsException e) {
2411         wasAcr = true;
2412       }
2413     } else {
2414       // lf 000A
2415       if (currentCharacter == '\n') {
2416         //must merge eventual cr followed by lf
2417         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2418           //System.out.println("merge LF-" + (currentPosition - 1));
2419           lineEnds[linePtr] = currentPosition - 1;
2420         } else {
2421           int separatorPos = currentPosition - 1;
2422           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2423             return;
2424           // System.out.println("LF-" + separatorPos);
2425           try {
2426             lineEnds[++linePtr] = separatorPos;
2427           } catch (IndexOutOfBoundsException e) {
2428             //linePtr value is correct
2429             int oldLength = lineEnds.length;
2430             int[] old = lineEnds;
2431             lineEnds = new int[oldLength + INCREMENT];
2432             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2433             lineEnds[linePtr] = separatorPos;
2434           }
2435         }
2436         wasAcr = false;
2437       }
2438     }
2439   }
2440   public final void pushUnicodeLineSeparator() {
2441     // isUnicode means that the \r or \n has been read as a unicode character
2442     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2443     final int INCREMENT = 250;
2444     //currentCharacter is at position currentPosition-1
2445     if (this.checkNonExternalizedStringLiterals) {
2446       // reinitialize the current line for non externalize strings purpose
2447       currentLine = null;
2448     }
2449     // cr 000D
2450     if (currentCharacter == '\r') {
2451       int separatorPos = currentPosition - 6;
2452       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2453         return;
2454       //System.out.println("CR-" + separatorPos);
2455       try {
2456         lineEnds[++linePtr] = separatorPos;
2457       } catch (IndexOutOfBoundsException e) {
2458         //linePtr value is correct
2459         int oldLength = lineEnds.length;
2460         int[] old = lineEnds;
2461         lineEnds = new int[oldLength + INCREMENT];
2462         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2463         lineEnds[linePtr] = separatorPos;
2464       }
2465       // look-ahead for merged cr+lf
2466       if (source[currentPosition] == '\n') {
2467         //System.out.println("look-ahead LF-" + currentPosition);
2468         lineEnds[linePtr] = currentPosition;
2469         currentPosition++;
2470         wasAcr = false;
2471       } else {
2472         wasAcr = true;
2473       }
2474     } else {
2475       // lf 000A
2476       if (currentCharacter == '\n') {
2477         //must merge eventual cr followed by lf
2478         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2479           //System.out.println("merge LF-" + (currentPosition - 1));
2480           lineEnds[linePtr] = currentPosition - 6;
2481         } else {
2482           int separatorPos = currentPosition - 6;
2483           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2484             return;
2485           // System.out.println("LF-" + separatorPos);
2486           try {
2487             lineEnds[++linePtr] = separatorPos;
2488           } catch (IndexOutOfBoundsException e) {
2489             //linePtr value is correct
2490             int oldLength = lineEnds.length;
2491             int[] old = lineEnds;
2492             lineEnds = new int[oldLength + INCREMENT];
2493             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2494             lineEnds[linePtr] = separatorPos;
2495           }
2496         }
2497         wasAcr = false;
2498       }
2499     }
2500   }
2501   public final void recordComment(boolean isJavadoc) {
2502     // a new annotation comment is recorded
2503     try {
2504       commentStops[++commentPtr] = isJavadoc
2505           ? currentPosition
2506           : -currentPosition;
2507     } catch (IndexOutOfBoundsException e) {
2508       int oldStackLength = commentStops.length;
2509       int[] oldStack = commentStops;
2510       commentStops = new int[oldStackLength + 30];
2511       System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2512       commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2513       //grows the positions buffers too
2514       int[] old = commentStarts;
2515       commentStarts = new int[oldStackLength + 30];
2516       System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2517     }
2518     //the buffer is of a correct size here
2519     commentStarts[commentPtr] = startPosition;
2520   }
2521   public void resetTo(int begin, int end) {
2522     //reset the scanner to a given position where it may rescan again
2523     diet = false;
2524     initialPosition = startPosition = currentPosition = begin;
2525     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2526     commentPtr = -1; // reset comment stack
2527   }
2528   public final void scanSingleQuotedEscapeCharacter()
2529       throws InvalidInputException {
2530     // the string with "\\u" is a legal string of two chars \ and u
2531     //thus we use a direct access to the source (for regular cases).
2532     //    if (unicodeAsBackSlash) {
2533     //      // consume next character
2534     //      unicodeAsBackSlash = false;
2535     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2536     //        && (source[currentPosition] == 'u')) {
2537     //        getNextUnicodeChar();
2538     //      } else {
2539     //        if (withoutUnicodePtr != 0) {
2540     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2541     //        }
2542     //      }
2543     //    } else
2544     currentCharacter = source[currentPosition++];
2545     switch (currentCharacter) {
2546       case '\'' :
2547         currentCharacter = '\'';
2548         break;
2549       case '\\' :
2550         currentCharacter = '\\';
2551         break;
2552       default :
2553         currentCharacter = '\\';
2554         currentPosition--;
2555     }
2556   }
2557   public final void scanDoubleQuotedEscapeCharacter()
2558       throws InvalidInputException {
2559     // the string with "\\u" is a legal string of two chars \ and u
2560     //thus we use a direct access to the source (for regular cases).
2561     //    if (unicodeAsBackSlash) {
2562     //      // consume next character
2563     //      unicodeAsBackSlash = false;
2564     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2565     //        && (source[currentPosition] == 'u')) {
2566     //        getNextUnicodeChar();
2567     //      } else {
2568     //        if (withoutUnicodePtr != 0) {
2569     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2570     //        }
2571     //      }
2572     //    } else
2573     currentCharacter = source[currentPosition++];
2574     switch (currentCharacter) {
2575       //      case 'b' :
2576       //        currentCharacter = '\b';
2577       //        break;
2578       case 't' :
2579         currentCharacter = '\t';
2580         break;
2581       case 'n' :
2582         currentCharacter = '\n';
2583         break;
2584       //      case 'f' :
2585       //        currentCharacter = '\f';
2586       //        break;
2587       case 'r' :
2588         currentCharacter = '\r';
2589         break;
2590       case '\"' :
2591         currentCharacter = '\"';
2592         break;
2593       case '\'' :
2594         currentCharacter = '\'';
2595         break;
2596       case '\\' :
2597         currentCharacter = '\\';
2598         break;
2599       case '$' :
2600         currentCharacter = '$';
2601         break;
2602       default :
2603         // -----------octal escape--------------
2604         // OctalDigit
2605         // OctalDigit OctalDigit
2606         // ZeroToThree OctalDigit OctalDigit
2607         int number = Character.getNumericValue(currentCharacter);
2608         if (number >= 0 && number <= 7) {
2609           boolean zeroToThreeNot = number > 3;
2610           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2611             int digit = Character.getNumericValue(currentCharacter);
2612             if (digit >= 0 && digit <= 7) {
2613               number = (number * 8) + digit;
2614               if (Character
2615                   .isDigit(currentCharacter = source[currentPosition++])) {
2616                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2617                   // Digit --> ignore last character
2618                   currentPosition--;
2619                 } else {
2620                   digit = Character.getNumericValue(currentCharacter);
2621                   if (digit >= 0 && digit <= 7) {
2622                     // has read \ZeroToThree OctalDigit OctalDigit
2623                     number = (number * 8) + digit;
2624                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2625                     // --> ignore last character
2626                     currentPosition--;
2627                   }
2628                 }
2629               } else { // has read \OctalDigit NonDigit--> ignore last
2630                 // character
2631                 currentPosition--;
2632               }
2633             } else { // has read \OctalDigit NonOctalDigit--> ignore last
2634               // character
2635               currentPosition--;
2636             }
2637           } else { // has read \OctalDigit --> ignore last character
2638             currentPosition--;
2639           }
2640           if (number > 255)
2641             throw new InvalidInputException(INVALID_ESCAPE);
2642           currentCharacter = (char) number;
2643         }
2644     //else
2645     //     throw new InvalidInputException(INVALID_ESCAPE);
2646     }
2647   }
2648   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2649   //    return scanIdentifierOrKeyword( false );
2650   //  }
2651   public int scanIdentifierOrKeyword(boolean isVariable)
2652       throws InvalidInputException {
2653     //test keywords
2654     //first dispatch on the first char.
2655     //then the length. If there are several
2656     //keywors with the same length AND the same first char, then do another
2657     //disptach on the second char :-)...cool....but fast !
2658     useAssertAsAnIndentifier = false;
2659     while (getNextCharAsJavaIdentifierPart()) {
2660     };
2661     if (isVariable) {
2662       //      if (new String(getCurrentTokenSource()).equals("$this")) {
2663       //        return TokenNamethis;
2664       //      }
2665       return TokenNameVariable;
2666     }
2667     int index, length;
2668     char[] data;
2669     char firstLetter;
2670     //    if (withoutUnicodePtr == 0)
2671     //quick test on length == 1 but not on length > 12 while most identifier
2672     //have a length which is <= 12...but there are lots of identifier with
2673     //only one char....
2674     //      {
2675     if ((length = currentPosition - startPosition) == 1)
2676       return TokenNameIdentifier;
2677     //  data = source;
2678     data = new char[length];
2679     index = startPosition;
2680     for (int i = 0; i < length; i++) {
2681       data[i] = Character.toLowerCase(source[index + i]);
2682     }
2683     index = 0;
2684     //    } else {
2685     //      if ((length = withoutUnicodePtr) == 1)
2686     //        return TokenNameIdentifier;
2687     //      // data = withoutUnicodeBuffer;
2688     //      data = new char[withoutUnicodeBuffer.length];
2689     //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2690     //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2691     //      }
2692     //      index = 1;
2693     //    }
2694     firstLetter = data[index];
2695     switch (firstLetter) {
2696       case '_' :
2697         switch (length) {
2698           case 8 :
2699             //__FILE__
2700             if ((data[++index] == '_') && (data[++index] == 'f')
2701                 && (data[++index] == 'i') && (data[++index] == 'l')
2702                 && (data[++index] == 'e') && (data[++index] == '_')
2703                 && (data[++index] == '_'))
2704               return TokenNameFILE;
2705             index = 0; //__LINE__
2706             if ((data[++index] == '_') && (data[++index] == 'l')
2707                 && (data[++index] == 'i') && (data[++index] == 'n')
2708                 && (data[++index] == 'e') && (data[++index] == '_')
2709                 && (data[++index] == '_'))
2710               return TokenNameLINE;
2711             break;
2712           case 9 :
2713             //__CLASS__
2714             if ((data[++index] == '_') && (data[++index] == 'c')
2715                 && (data[++index] == 'l') && (data[++index] == 'a')
2716                 && (data[++index] == 's') && (data[++index] == 's')
2717                 && (data[++index] == '_') && (data[++index] == '_'))
2718               return TokenNameCLASS_C;
2719             break;
2720           case 11 :
2721             //__METHOD__
2722             if ((data[++index] == '_') && (data[++index] == 'm')
2723                 && (data[++index] == 'e') && (data[++index] == 't')
2724                 && (data[++index] == 'h') && (data[++index] == 'o')
2725                 && (data[++index] == 'd') && (data[++index] == '_')
2726                 && (data[++index] == '_'))
2727               return TokenNameMETHOD_C;
2728             break;
2729           case 12 :
2730             //__FUNCTION__
2731             if ((data[++index] == '_') && (data[++index] == 'f')
2732                 && (data[++index] == 'u') && (data[++index] == 'n')
2733                 && (data[++index] == 'c') && (data[++index] == 't')
2734                 && (data[++index] == 'i') && (data[++index] == 'o')
2735                 && (data[++index] == 'n') && (data[++index] == '_')
2736                 && (data[++index] == '_'))
2737               return TokenNameFUNC_C;
2738             break;
2739         }
2740         return TokenNameIdentifier;
2741       case 'a' :
2742         // as and array abstract
2743         switch (length) {
2744           case 2 :
2745             //as
2746             if ((data[++index] == 's')) {
2747               return TokenNameas;
2748             } else {
2749               return TokenNameIdentifier;
2750             }
2751           case 3 :
2752             //and
2753             if ((data[++index] == 'n') && (data[++index] == 'd')) {
2754               return TokenNameand;
2755             } else {
2756               return TokenNameIdentifier;
2757             }
2758           case 5 :
2759             // array
2760             if ((data[++index] == 'r') && (data[++index] == 'r')
2761                 && (data[++index] == 'a') && (data[++index] == 'y'))
2762               return TokenNamearray;
2763             else
2764               return TokenNameIdentifier;
2765           case 8 :
2766             if ((data[++index] == 'b') && (data[++index] == 's')
2767                 && (data[++index] == 't') && (data[++index] == 'r')
2768                 && (data[++index] == 'a') && (data[++index] == 'c')
2769                 && (data[++index] == 't'))
2770               return TokenNameabstract;
2771             else
2772               return TokenNameIdentifier;
2773           default :
2774             return TokenNameIdentifier;
2775         }
2776       case 'b' :
2777         //break
2778         switch (length) {
2779           case 5 :
2780             if ((data[++index] == 'r') && (data[++index] == 'e')
2781                 && (data[++index] == 'a') && (data[++index] == 'k'))
2782               return TokenNamebreak;
2783             else
2784               return TokenNameIdentifier;
2785           default :
2786             return TokenNameIdentifier;
2787         }
2788       case 'c' :
2789         //case catch class clone const continue
2790         switch (length) {
2791           case 4 :
2792             if ((data[++index] == 'a') && (data[++index] == 's')
2793                 && (data[++index] == 'e'))
2794               return TokenNamecase;
2795             else
2796               return TokenNameIdentifier;
2797           case 5 :
2798             if ((data[++index] == 'a') && (data[++index] == 't')
2799                 && (data[++index] == 'c') && (data[++index] == 'h'))
2800               return TokenNamecatch;
2801             index = 0;
2802             if ((data[++index] == 'l') && (data[++index] == 'a')
2803                 && (data[++index] == 's') && (data[++index] == 's'))
2804               return TokenNameclass;
2805             index = 0;
2806             if ((data[++index] == 'l') && (data[++index] == 'o')
2807                 && (data[++index] == 'n') && (data[++index] == 'e'))
2808               return TokenNameclone;
2809             index = 0;
2810             if ((data[++index] == 'o') && (data[++index] == 'n')
2811                 && (data[++index] == 's') && (data[++index] == 't'))
2812               return TokenNameconst;
2813             else
2814               return TokenNameIdentifier;
2815           case 8 :
2816             if ((data[++index] == 'o') && (data[++index] == 'n')
2817                 && (data[++index] == 't') && (data[++index] == 'i')
2818                 && (data[++index] == 'n') && (data[++index] == 'u')
2819                 && (data[++index] == 'e'))
2820               return TokenNamecontinue;
2821             else
2822               return TokenNameIdentifier;
2823           default :
2824             return TokenNameIdentifier;
2825         }
2826       case 'd' :
2827         // declare default do die
2828         // TODO delete define ==> no keyword !
2829         switch (length) {
2830           case 2 :
2831             if ((data[++index] == 'o'))
2832               return TokenNamedo;
2833             else
2834               return TokenNameIdentifier;
2835           //          case 6 :
2836           //            if ((data[++index] == 'e')
2837           //              && (data[++index] == 'f')
2838           //              && (data[++index] == 'i')
2839           //              && (data[++index] == 'n')
2840           //              && (data[++index] == 'e'))
2841           //              return TokenNamedefine;
2842           //            else
2843           //              return TokenNameIdentifier;
2844           case 7 :
2845             if ((data[++index] == 'e') && (data[++index] == 'c')
2846                 && (data[++index] == 'l') && (data[++index] == 'a')
2847                 && (data[++index] == 'r') && (data[++index] == 'e'))
2848               return TokenNamedeclare;
2849             index = 0;
2850             if ((data[++index] == 'e') && (data[++index] == 'f')
2851                 && (data[++index] == 'a') && (data[++index] == 'u')
2852                 && (data[++index] == 'l') && (data[++index] == 't'))
2853               return TokenNamedefault;
2854             else
2855               return TokenNameIdentifier;
2856           default :
2857             return TokenNameIdentifier;
2858         }
2859       case 'e' :
2860         //echo else exit elseif extends eval
2861         switch (length) {
2862           case 4 :
2863             if ((data[++index] == 'c') && (data[++index] == 'h')
2864                 && (data[++index] == 'o'))
2865               return TokenNameecho;
2866             else if ((data[index] == 'l') && (data[++index] == 's')
2867                 && (data[++index] == 'e'))
2868               return TokenNameelse;
2869             else if ((data[index] == 'x') && (data[++index] == 'i')
2870                 && (data[++index] == 't'))
2871               return TokenNameexit;
2872             else if ((data[index] == 'v') && (data[++index] == 'a')
2873                 && (data[++index] == 'l'))
2874               return TokenNameeval;
2875             else
2876               return TokenNameIdentifier;
2877           case 5 :
2878             // endif empty
2879             if ((data[++index] == 'n') && (data[++index] == 'd')
2880                 && (data[++index] == 'i') && (data[++index] == 'f'))
2881               return TokenNameendif;
2882             if ((data[index] == 'm') && (data[++index] == 'p')
2883                 && (data[++index] == 't') && (data[++index] == 'y'))
2884               return TokenNameempty;
2885             else
2886               return TokenNameIdentifier;
2887           case 6 :
2888             // endfor
2889             if ((data[++index] == 'n') && (data[++index] == 'd')
2890                 && (data[++index] == 'f') && (data[++index] == 'o')
2891                 && (data[++index] == 'r'))
2892               return TokenNameendfor;
2893             else if ((data[index] == 'l') && (data[++index] == 's')
2894                 && (data[++index] == 'e') && (data[++index] == 'i')
2895                 && (data[++index] == 'f'))
2896               return TokenNameelseif;
2897             else
2898               return TokenNameIdentifier;
2899           case 7 :
2900             if ((data[++index] == 'x') && (data[++index] == 't')
2901                 && (data[++index] == 'e') && (data[++index] == 'n')
2902                 && (data[++index] == 'd') && (data[++index] == 's'))
2903               return TokenNameextends;
2904             else
2905               return TokenNameIdentifier;
2906           case 8 :
2907             // endwhile
2908             if ((data[++index] == 'n') && (data[++index] == 'd')
2909                 && (data[++index] == 'w') && (data[++index] == 'h')
2910                 && (data[++index] == 'i') && (data[++index] == 'l')
2911                 && (data[++index] == 'e'))
2912               return TokenNameendwhile;
2913             else
2914               return TokenNameIdentifier;
2915           case 9 :
2916             // endswitch
2917             if ((data[++index] == 'n') && (data[++index] == 'd')
2918                 && (data[++index] == 's') && (data[++index] == 'w')
2919                 && (data[++index] == 'i') && (data[++index] == 't')
2920                 && (data[++index] == 'c') && (data[++index] == 'h'))
2921               return TokenNameendswitch;
2922             else
2923               return TokenNameIdentifier;
2924           case 10 :
2925             // enddeclare
2926             if ((data[++index] == 'n') && (data[++index] == 'd')
2927                 && (data[++index] == 'd') && (data[++index] == 'e')
2928                 && (data[++index] == 'c') && (data[++index] == 'l')
2929                 && (data[++index] == 'a') && (data[++index] == 'r')
2930                 && (data[++index] == 'e'))
2931               return TokenNameendforeach;
2932             index = 0;
2933             if ((data[++index] == 'n') // endforeach
2934                 && (data[++index] == 'd') && (data[++index] == 'f')
2935                 && (data[++index] == 'o') && (data[++index] == 'r')
2936                 && (data[++index] == 'e') && (data[++index] == 'a')
2937                 && (data[++index] == 'c') && (data[++index] == 'h'))
2938               return TokenNameendforeach;
2939             else
2940               return TokenNameIdentifier;
2941           default :
2942             return TokenNameIdentifier;
2943         }
2944       case 'f' :
2945         //for false final function
2946         switch (length) {
2947           case 3 :
2948             if ((data[++index] == 'o') && (data[++index] == 'r'))
2949               return TokenNamefor;
2950             else
2951               return TokenNameIdentifier;
2952           case 5 :
2953             //            if ((data[++index] == 'a') && (data[++index] == 'l')
2954             //                && (data[++index] == 's') && (data[++index] == 'e'))
2955             //              return TokenNamefalse;
2956             if ((data[++index] == 'i') && (data[++index] == 'n')
2957                 && (data[++index] == 'a') && (data[++index] == 'l'))
2958               return TokenNamefinal;
2959             else
2960               return TokenNameIdentifier;
2961           case 7 :
2962             // foreach
2963             if ((data[++index] == 'o') && (data[++index] == 'r')
2964                 && (data[++index] == 'e') && (data[++index] == 'a')
2965                 && (data[++index] == 'c') && (data[++index] == 'h'))
2966               return TokenNameforeach;
2967             else
2968               return TokenNameIdentifier;
2969           case 8 :
2970             // function
2971             if ((data[++index] == 'u') && (data[++index] == 'n')
2972                 && (data[++index] == 'c') && (data[++index] == 't')
2973                 && (data[++index] == 'i') && (data[++index] == 'o')
2974                 && (data[++index] == 'n'))
2975               return TokenNamefunction;
2976             else
2977               return TokenNameIdentifier;
2978           default :
2979             return TokenNameIdentifier;
2980         }
2981       case 'g' :
2982         //global
2983         if (length == 6) {
2984           if ((data[++index] == 'l') && (data[++index] == 'o')
2985               && (data[++index] == 'b') && (data[++index] == 'a')
2986               && (data[++index] == 'l')) {
2987             return TokenNameglobal;
2988           }
2989         }
2990         return TokenNameIdentifier;
2991       case 'i' :
2992         //if int isset include include_once instanceof interface implements
2993         switch (length) {
2994           case 2 :
2995             if (data[++index] == 'f')
2996               return TokenNameif;
2997             else
2998               return TokenNameIdentifier;
2999           //          case 3 :
3000           //            if ((data[++index] == 'n') && (data[++index] == 't'))
3001           //              return TokenNameint;
3002           //            else
3003           //              return TokenNameIdentifier;
3004           case 5 :
3005             if ((data[++index] == 's') && (data[++index] == 's')
3006                 && (data[++index] == 'e') && (data[++index] == 't'))
3007               return TokenNameisset;
3008             else
3009               return TokenNameIdentifier;
3010           case 7 :
3011             if ((data[++index] == 'n') && (data[++index] == 'c')
3012                 && (data[++index] == 'l') && (data[++index] == 'u')
3013                 && (data[++index] == 'd') && (data[++index] == 'e'))
3014               return TokenNameinclude;
3015             else
3016               return TokenNameIdentifier;
3017           case 9 :
3018             // interface
3019             if ((data[++index] == 'n') && (data[++index] == 't')
3020                 && (data[++index] == 'e') && (data[++index] == 'r')
3021                 && (data[++index] == 'f') && (data[++index] == 'a')
3022                 && (data[++index] == 'c') && (data[++index] == 'e'))
3023               return TokenNameinterface;
3024             else
3025               return TokenNameIdentifier;
3026           case 10 :
3027             // instanceof
3028             if ((data[++index] == 'n') && (data[++index] == 's')
3029                 && (data[++index] == 't') && (data[++index] == 'a')
3030                 && (data[++index] == 'n') && (data[++index] == 'c')
3031                 && (data[++index] == 'e') && (data[++index] == 'o')
3032                 && (data[++index] == 'f'))
3033               return TokenNameinstanceof;
3034             if ((data[index] == 'm') && (data[++index] == 'p')
3035                 && (data[++index] == 'l') && (data[++index] == 'e')
3036                 && (data[++index] == 'm') && (data[++index] == 'e')
3037                 && (data[++index] == 'n') && (data[++index] == 't')
3038                 && (data[++index] == 's'))
3039               return TokenNameimplements;
3040             else
3041               return TokenNameIdentifier;
3042           case 12 :
3043             if ((data[++index] == 'n') && (data[++index] == 'c')
3044                 && (data[++index] == 'l') && (data[++index] == 'u')
3045                 && (data[++index] == 'd') && (data[++index] == 'e')
3046                 && (data[++index] == '_') && (data[++index] == 'o')
3047                 && (data[++index] == 'n') && (data[++index] == 'c')
3048                 && (data[++index] == 'e'))
3049               return TokenNameinclude_once;
3050             else
3051               return TokenNameIdentifier;
3052           default :
3053             return TokenNameIdentifier;
3054         }
3055       case 'l' :
3056         //list
3057         if (length == 4) {
3058           if ((data[++index] == 'i') && (data[++index] == 's')
3059               && (data[++index] == 't')) {
3060             return TokenNamelist;
3061           }
3062         }
3063         return TokenNameIdentifier;
3064       case 'n' :
3065         // new null
3066         switch (length) {
3067           case 3 :
3068             if ((data[++index] == 'e') && (data[++index] == 'w'))
3069               return TokenNamenew;
3070             else
3071               return TokenNameIdentifier;
3072           //          case 4 :
3073           //            if ((data[++index] == 'u') && (data[++index] == 'l')
3074           //                && (data[++index] == 'l'))
3075           //              return TokenNamenull;
3076           //            else
3077           //              return TokenNameIdentifier;
3078           default :
3079             return TokenNameIdentifier;
3080         }
3081       case 'o' :
3082         // or old_function
3083         if (length == 2) {
3084           if (data[++index] == 'r') {
3085             return TokenNameor;
3086           }
3087         }
3088         //        if (length == 12) {
3089         //          if ((data[++index] == 'l')
3090         //            && (data[++index] == 'd')
3091         //            && (data[++index] == '_')
3092         //            && (data[++index] == 'f')
3093         //            && (data[++index] == 'u')
3094         //            && (data[++index] == 'n')
3095         //            && (data[++index] == 'c')
3096         //            && (data[++index] == 't')
3097         //            && (data[++index] == 'i')
3098         //            && (data[++index] == 'o')
3099         //            && (data[++index] == 'n')) {
3100         //            return TokenNameold_function;
3101         //          }
3102         //        }
3103         return TokenNameIdentifier;
3104       case 'p' :
3105         // print public private protected
3106         switch (length) {
3107           case 5 :
3108             if ((data[++index] == 'r') && (data[++index] == 'i')
3109                 && (data[++index] == 'n') && (data[++index] == 't')) {
3110               return TokenNameprint;
3111             } else
3112               return TokenNameIdentifier;
3113           case 6 :
3114             if ((data[++index] == 'u') && (data[++index] == 'b')
3115                 && (data[++index] == 'l') && (data[++index] == 'i')
3116                 && (data[++index] == 'c')) {
3117               return TokenNamepublic;
3118             } else
3119               return TokenNameIdentifier;
3120           case 7 :
3121             if ((data[++index] == 'r') && (data[++index] == 'i')
3122                 && (data[++index] == 'v') && (data[++index] == 'a')
3123                 && (data[++index] == 't') && (data[++index] == 'e')) {
3124               return TokenNameprivate;
3125             } else
3126               return TokenNameIdentifier;
3127           case 9 :
3128             if ((data[++index] == 'r') && (data[++index] == 'o')
3129                 && (data[++index] == 't') && (data[++index] == 'e')
3130                 && (data[++index] == 'c') && (data[++index] == 't')
3131                 && (data[++index] == 'e') && (data[++index] == 'd')) {
3132               return TokenNameprotected;
3133             } else
3134               return TokenNameIdentifier;
3135         }
3136         return TokenNameIdentifier;
3137       case 'r' :
3138         //return require require_once
3139         if (length == 6) {
3140           if ((data[++index] == 'e') && (data[++index] == 't')
3141               && (data[++index] == 'u') && (data[++index] == 'r')
3142               && (data[++index] == 'n')) {
3143             return TokenNamereturn;
3144           }
3145         } else if (length == 7) {
3146           if ((data[++index] == 'e') && (data[++index] == 'q')
3147               && (data[++index] == 'u') && (data[++index] == 'i')
3148               && (data[++index] == 'r') && (data[++index] == 'e')) {
3149             return TokenNamerequire;
3150           }
3151         } else if (length == 12) {
3152           if ((data[++index] == 'e') && (data[++index] == 'q')
3153               && (data[++index] == 'u') && (data[++index] == 'i')
3154               && (data[++index] == 'r') && (data[++index] == 'e')
3155               && (data[++index] == '_') && (data[++index] == 'o')
3156               && (data[++index] == 'n') && (data[++index] == 'c')
3157               && (data[++index] == 'e')) {
3158             return TokenNamerequire_once;
3159           }
3160         } else
3161           return TokenNameIdentifier;
3162       case 's' :
3163         //static switch
3164         switch (length) {
3165           case 6 :
3166             if (data[++index] == 't')
3167               if ((data[++index] == 'a') && (data[++index] == 't')
3168                   && (data[++index] == 'i') && (data[++index] == 'c')) {
3169                 return TokenNamestatic;
3170               } else
3171                 return TokenNameIdentifier;
3172             else if ((data[index] == 'w') && (data[++index] == 'i')
3173                 && (data[++index] == 't') && (data[++index] == 'c')
3174                 && (data[++index] == 'h'))
3175               return TokenNameswitch;
3176             else
3177               return TokenNameIdentifier;
3178           default :
3179             return TokenNameIdentifier;
3180         }
3181       case 't' :
3182         // try true throw
3183         switch (length) {
3184           case 3 :
3185             if ((data[++index] == 'r') && (data[++index] == 'y'))
3186               return TokenNametry;
3187             else
3188               return TokenNameIdentifier;
3189           //          case 4 :
3190           //            if ((data[++index] == 'r') && (data[++index] == 'u')
3191           //                && (data[++index] == 'e'))
3192           //              return TokenNametrue;
3193           //            else
3194           //              return TokenNameIdentifier;
3195           case 5 :
3196             if ((data[++index] == 'h') && (data[++index] == 'r')
3197                 && (data[++index] == 'o') && (data[++index] == 'w'))
3198               return TokenNamethrow;
3199             else
3200               return TokenNameIdentifier;
3201           default :
3202             return TokenNameIdentifier;
3203         }
3204       case 'u' :
3205         //use unset
3206         switch (length) {
3207           case 3 :
3208             if ((data[++index] == 's') && (data[++index] == 'e'))
3209               return TokenNameuse;
3210             else
3211               return TokenNameIdentifier;
3212           case 5 :
3213             if ((data[++index] == 'n') && (data[++index] == 's')
3214                 && (data[++index] == 'e') && (data[++index] == 't'))
3215               return TokenNameunset;
3216             else
3217               return TokenNameIdentifier;
3218           default :
3219             return TokenNameIdentifier;
3220         }
3221       case 'v' :
3222         //var
3223         switch (length) {
3224           case 3 :
3225             if ((data[++index] == 'a') && (data[++index] == 'r'))
3226               return TokenNamevar;
3227             else
3228               return TokenNameIdentifier;
3229           default :
3230             return TokenNameIdentifier;
3231         }
3232       case 'w' :
3233         //while
3234         switch (length) {
3235           case 5 :
3236             if ((data[++index] == 'h') && (data[++index] == 'i')
3237                 && (data[++index] == 'l') && (data[++index] == 'e'))
3238               return TokenNamewhile;
3239             else
3240               return TokenNameIdentifier;
3241           //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3242           // (data[++index]=='e') && (data[++index]=='f')&&
3243           // (data[++index]=='p'))
3244           //return TokenNamewidefp ;
3245           //else
3246           //return TokenNameIdentifier;
3247           default :
3248             return TokenNameIdentifier;
3249         }
3250       case 'x' :
3251         //xor
3252         switch (length) {
3253           case 3 :
3254             if ((data[++index] == 'o') && (data[++index] == 'r'))
3255               return TokenNamexor;
3256             else
3257               return TokenNameIdentifier;
3258           default :
3259             return TokenNameIdentifier;
3260         }
3261       default :
3262         return TokenNameIdentifier;
3263     }
3264   }
3265   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3266     //when entering this method the currentCharacter is the firt
3267     //digit of the number , i.e. it may be preceeded by a . when
3268     //dotPrefix is true
3269     boolean floating = dotPrefix;
3270     if ((!dotPrefix) && (currentCharacter == '0')) {
3271       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3272         //force the first char of the hexa number do exist...
3273         // consume next character
3274         unicodeAsBackSlash = false;
3275         currentCharacter = source[currentPosition++];
3276         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3277         //          && (source[currentPosition] == 'u')) {
3278         //          getNextUnicodeChar();
3279         //        } else {
3280         //          if (withoutUnicodePtr != 0) {
3281         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3282         //          }
3283         //        }
3284         if (Character.digit(currentCharacter, 16) == -1)
3285           throw new InvalidInputException(INVALID_HEXA);
3286         //---end forcing--
3287         while (getNextCharAsDigit(16)) {
3288         };
3289         //        if (getNextChar('l', 'L') >= 0)
3290         //          return TokenNameLongLiteral;
3291         //        else
3292         return TokenNameIntegerLiteral;
3293       }
3294       //there is x or X in the number
3295       //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3296       // 00078.0 is true !!!!! crazy language
3297       if (getNextCharAsDigit()) {
3298         //-------------potential octal-----------------
3299         while (getNextCharAsDigit()) {
3300         };
3301         //        if (getNextChar('l', 'L') >= 0) {
3302         //          return TokenNameLongLiteral;
3303         //        }
3304         //
3305         //        if (getNextChar('f', 'F') >= 0) {
3306         //          return TokenNameFloatingPointLiteral;
3307         //        }
3308         if (getNextChar('d', 'D') >= 0) {
3309           return TokenNameDoubleLiteral;
3310         } else { //make the distinction between octal and float ....
3311           if (getNextChar('.')) { //bingo ! ....
3312             while (getNextCharAsDigit()) {
3313             };
3314             if (getNextChar('e', 'E') >= 0) {
3315               // consume next character
3316               unicodeAsBackSlash = false;
3317               currentCharacter = source[currentPosition++];
3318               //              if (((currentCharacter = source[currentPosition++]) == '\\')
3319               //                && (source[currentPosition] == 'u')) {
3320               //                getNextUnicodeChar();
3321               //              } else {
3322               //                if (withoutUnicodePtr != 0) {
3323               //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3324               //                }
3325               //              }
3326               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3327                 // consume next character
3328                 unicodeAsBackSlash = false;
3329                 currentCharacter = source[currentPosition++];
3330                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3331                 //                  && (source[currentPosition] == 'u')) {
3332                 //                  getNextUnicodeChar();
3333                 //                } else {
3334                 //                  if (withoutUnicodePtr != 0) {
3335                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3336                 //                      currentCharacter;
3337                 //                  }
3338                 //                }
3339               }
3340               if (!Character.isDigit(currentCharacter))
3341                 throw new InvalidInputException(INVALID_FLOAT);
3342               while (getNextCharAsDigit()) {
3343               };
3344             }
3345             //            if (getNextChar('f', 'F') >= 0)
3346             //              return TokenNameFloatingPointLiteral;
3347             getNextChar('d', 'D'); //jump over potential d or D
3348             return TokenNameDoubleLiteral;
3349           } else {
3350             return TokenNameIntegerLiteral;
3351           }
3352         }
3353       } else {
3354         /* carry on */
3355       }
3356     }
3357     while (getNextCharAsDigit()) {
3358     };
3359     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3360     //      return TokenNameLongLiteral;
3361     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3362       while (getNextCharAsDigit()) {
3363       };
3364       floating = true;
3365     }
3366     //if floating is true both exponant and suffix may be optional
3367     if (getNextChar('e', 'E') >= 0) {
3368       floating = true;
3369       // consume next character
3370       unicodeAsBackSlash = false;
3371       currentCharacter = source[currentPosition++];
3372       //      if (((currentCharacter = source[currentPosition++]) == '\\')
3373       //        && (source[currentPosition] == 'u')) {
3374       //        getNextUnicodeChar();
3375       //      } else {
3376       //        if (withoutUnicodePtr != 0) {
3377       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3378       //        }
3379       //      }
3380       if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3381         // next
3382         // character
3383         unicodeAsBackSlash = false;
3384         currentCharacter = source[currentPosition++];
3385         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3386         //          && (source[currentPosition] == 'u')) {
3387         //          getNextUnicodeChar();
3388         //        } else {
3389         //          if (withoutUnicodePtr != 0) {
3390         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3391         //          }
3392         //        }
3393       }
3394       if (!Character.isDigit(currentCharacter))
3395         throw new InvalidInputException(INVALID_FLOAT);
3396       while (getNextCharAsDigit()) {
3397       };
3398     }
3399     if (getNextChar('d', 'D') >= 0)
3400       return TokenNameDoubleLiteral;
3401     //    if (getNextChar('f', 'F') >= 0)
3402     //      return TokenNameFloatingPointLiteral;
3403     //the long flag has been tested before
3404     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3405   }
3406   /**
3407    * Search the line number corresponding to a specific position
3408    *
3409    */
3410   public final int getLineNumber(int position) {
3411     if (lineEnds == null)
3412       return 1;
3413     int length = linePtr + 1;
3414     if (length == 0)
3415       return 1;
3416     int g = 0, d = length - 1;
3417     int m = 0;
3418     while (g <= d) {
3419       m = (g + d) / 2;
3420       if (position < lineEnds[m]) {
3421         d = m - 1;
3422       } else if (position > lineEnds[m]) {
3423         g = m + 1;
3424       } else {
3425         return m + 1;
3426       }
3427     }
3428     if (position < lineEnds[m]) {
3429       return m + 1;
3430     }
3431     return m + 2;
3432   }
3433   public void setPHPMode(boolean mode) {
3434     phpMode = mode;
3435   }
3436   public final void setSource(char[] source) {
3437     //the source-buffer is set to sourceString
3438     if (source == null) {
3439       this.source = new char[0];
3440     } else {
3441       this.source = source;
3442     }
3443     startPosition = -1;
3444     initialPosition = currentPosition = 0;
3445     containsAssertKeyword = false;
3446     withoutUnicodeBuffer = new char[this.source.length];
3447   }
3448   public String toString() {
3449     if (startPosition == source.length)
3450       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3451     if (currentPosition > source.length)
3452       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3453     char front[] = new char[startPosition];
3454     System.arraycopy(source, 0, front, 0, startPosition);
3455     int middleLength = (currentPosition - 1) - startPosition + 1;
3456     char middle[];
3457     if (middleLength > -1) {
3458       middle = new char[middleLength];
3459       System.arraycopy(source, startPosition, middle, 0, middleLength);
3460     } else {
3461       middle = new char[0];
3462     }
3463     char end[] = new char[source.length - (currentPosition - 1)];
3464     System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length
3465         - (currentPosition - 1) - 1);
3466     return new String(front)
3467         + "\n===============================\nStarts here -->" //$NON-NLS-1$
3468         + new String(middle)
3469         + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3470         + new String(end);
3471   }
3472   public final String toStringAction(int act) {
3473     switch (act) {
3474       case TokenNameERROR :
3475         return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3476       // //$NON-NLS-1$
3477       case TokenNameINLINE_HTML :
3478         return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3479       case TokenNameIdentifier :
3480         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3481       case TokenNameVariable :
3482         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3483       case TokenNameabstract :
3484         return "abstract"; //$NON-NLS-1$
3485       case TokenNameand :
3486         return "AND"; //$NON-NLS-1$
3487       case TokenNamearray :
3488         return "array"; //$NON-NLS-1$
3489       case TokenNameas :
3490         return "as"; //$NON-NLS-1$
3491       case TokenNamebreak :
3492         return "break"; //$NON-NLS-1$
3493       case TokenNamecase :
3494         return "case"; //$NON-NLS-1$
3495       case TokenNameclass :
3496         return "class"; //$NON-NLS-1$
3497       case TokenNamecatch :
3498         return "catch"; //$NON-NLS-1$
3499       case TokenNameclone :
3500         //$NON-NLS-1$
3501         return "clone";
3502       case TokenNameconst :
3503         //$NON-NLS-1$
3504         return "const";
3505       case TokenNamecontinue :
3506         return "continue"; //$NON-NLS-1$
3507       case TokenNamedefault :
3508         return "default"; //$NON-NLS-1$
3509       //      case TokenNamedefine :
3510       //        return "define"; //$NON-NLS-1$
3511       case TokenNamedo :
3512         return "do"; //$NON-NLS-1$
3513       case TokenNameecho :
3514         return "echo"; //$NON-NLS-1$
3515       case TokenNameelse :
3516         return "else"; //$NON-NLS-1$
3517       case TokenNameelseif :
3518         return "elseif"; //$NON-NLS-1$
3519       case TokenNameendfor :
3520         return "endfor"; //$NON-NLS-1$
3521       case TokenNameendforeach :
3522         return "endforeach"; //$NON-NLS-1$
3523       case TokenNameendif :
3524         return "endif"; //$NON-NLS-1$
3525       case TokenNameendswitch :
3526         return "endswitch"; //$NON-NLS-1$
3527       case TokenNameendwhile :
3528         return "endwhile"; //$NON-NLS-1$
3529       case TokenNameextends :
3530         return "extends"; //$NON-NLS-1$
3531       //      case TokenNamefalse :
3532       //        return "false"; //$NON-NLS-1$
3533       case TokenNamefinal :
3534         return "final"; //$NON-NLS-1$
3535       case TokenNamefor :
3536         return "for"; //$NON-NLS-1$
3537       case TokenNameforeach :
3538         return "foreach"; //$NON-NLS-1$
3539       case TokenNamefunction :
3540         return "function"; //$NON-NLS-1$
3541       case TokenNameglobal :
3542         return "global"; //$NON-NLS-1$
3543       case TokenNameif :
3544         return "if"; //$NON-NLS-1$
3545       case TokenNameimplements :
3546         return "implements"; //$NON-NLS-1$
3547       case TokenNameinclude :
3548         return "include"; //$NON-NLS-1$
3549       case TokenNameinclude_once :
3550         return "include_once"; //$NON-NLS-1$
3551       case TokenNameinstanceof :
3552         return "instanceof"; //$NON-NLS-1$
3553       case TokenNameinterface :
3554         return "interface"; //$NON-NLS-1$
3555       case TokenNameisset :
3556         return "isset"; //$NON-NLS-1$
3557       case TokenNamelist :
3558         return "list"; //$NON-NLS-1$
3559       case TokenNamenew :
3560         return "new"; //$NON-NLS-1$
3561       //      case TokenNamenull :
3562       //        return "null"; //$NON-NLS-1$
3563       case TokenNameor :
3564         return "OR"; //$NON-NLS-1$
3565       case TokenNameprint :
3566         return "print"; //$NON-NLS-1$
3567       case TokenNameprivate :
3568         return "private"; //$NON-NLS-1$
3569       case TokenNameprotected :
3570         return "protected"; //$NON-NLS-1$
3571       case TokenNamepublic :
3572         return "public"; //$NON-NLS-1$
3573       case TokenNamerequire :
3574         return "require"; //$NON-NLS-1$
3575       case TokenNamerequire_once :
3576         return "require_once"; //$NON-NLS-1$
3577       case TokenNamereturn :
3578         return "return"; //$NON-NLS-1$
3579       case TokenNamestatic :
3580         return "static"; //$NON-NLS-1$
3581       case TokenNameswitch :
3582         return "switch"; //$NON-NLS-1$
3583       //      case TokenNametrue :
3584       //        return "true"; //$NON-NLS-1$
3585       case TokenNameunset :
3586         return "unset"; //$NON-NLS-1$
3587       case TokenNamevar :
3588         return "var"; //$NON-NLS-1$
3589       case TokenNamewhile :
3590         return "while"; //$NON-NLS-1$
3591       case TokenNamexor :
3592         return "XOR"; //$NON-NLS-1$
3593       //      case TokenNamethis :
3594       //        return "$this"; //$NON-NLS-1$
3595       case TokenNameIntegerLiteral :
3596         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3597       case TokenNameDoubleLiteral :
3598         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3599       case TokenNameStringLiteral :
3600         return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3601       case TokenNameStringConstant :
3602         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3603       case TokenNameStringInterpolated :
3604         return "StringInterpolated(" + new String(getCurrentTokenSource())
3605             + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3606       case TokenNameHEREDOC :
3607         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3608       case TokenNamePLUS_PLUS :
3609         return "++"; //$NON-NLS-1$
3610       case TokenNameMINUS_MINUS :
3611         return "--"; //$NON-NLS-1$
3612       case TokenNameEQUAL_EQUAL :
3613         return "=="; //$NON-NLS-1$
3614       case TokenNameEQUAL_EQUAL_EQUAL :
3615         return "==="; //$NON-NLS-1$
3616       case TokenNameEQUAL_GREATER :
3617         return "=>"; //$NON-NLS-1$
3618       case TokenNameLESS_EQUAL :
3619         return "<="; //$NON-NLS-1$
3620       case TokenNameGREATER_EQUAL :
3621         return ">="; //$NON-NLS-1$
3622       case TokenNameNOT_EQUAL :
3623         return "!="; //$NON-NLS-1$
3624       case TokenNameNOT_EQUAL_EQUAL :
3625         return "!=="; //$NON-NLS-1$
3626       case TokenNameLEFT_SHIFT :
3627         return "<<"; //$NON-NLS-1$
3628       case TokenNameRIGHT_SHIFT :
3629         return ">>"; //$NON-NLS-1$
3630       case TokenNamePLUS_EQUAL :
3631         return "+="; //$NON-NLS-1$
3632       case TokenNameMINUS_EQUAL :
3633         return "-="; //$NON-NLS-1$
3634       case TokenNameMULTIPLY_EQUAL :
3635         return "*="; //$NON-NLS-1$
3636       case TokenNameDIVIDE_EQUAL :
3637         return "/="; //$NON-NLS-1$
3638       case TokenNameAND_EQUAL :
3639         return "&="; //$NON-NLS-1$
3640       case TokenNameOR_EQUAL :
3641         return "|="; //$NON-NLS-1$
3642       case TokenNameXOR_EQUAL :
3643         return "^="; //$NON-NLS-1$
3644       case TokenNameREMAINDER_EQUAL :
3645         return "%="; //$NON-NLS-1$
3646       case TokenNameDOT_EQUAL :
3647         return ".="; //$NON-NLS-1$
3648       case TokenNameLEFT_SHIFT_EQUAL :
3649         return "<<="; //$NON-NLS-1$
3650       case TokenNameRIGHT_SHIFT_EQUAL :
3651         return ">>="; //$NON-NLS-1$
3652       case TokenNameOR_OR :
3653         return "||"; //$NON-NLS-1$
3654       case TokenNameAND_AND :
3655         return "&&"; //$NON-NLS-1$
3656       case TokenNamePLUS :
3657         return "+"; //$NON-NLS-1$
3658       case TokenNameMINUS :
3659         return "-"; //$NON-NLS-1$
3660       case TokenNameMINUS_GREATER :
3661         return "->";
3662       case TokenNameNOT :
3663         return "!"; //$NON-NLS-1$
3664       case TokenNameREMAINDER :
3665         return "%"; //$NON-NLS-1$
3666       case TokenNameXOR :
3667         return "^"; //$NON-NLS-1$
3668       case TokenNameAND :
3669         return "&"; //$NON-NLS-1$
3670       case TokenNameMULTIPLY :
3671         return "*"; //$NON-NLS-1$
3672       case TokenNameOR :
3673         return "|"; //$NON-NLS-1$
3674       case TokenNameTWIDDLE :
3675         return "~"; //$NON-NLS-1$
3676       case TokenNameTWIDDLE_EQUAL :
3677         return "~="; //$NON-NLS-1$
3678       case TokenNameDIVIDE :
3679         return "/"; //$NON-NLS-1$
3680       case TokenNameGREATER :
3681         return ">"; //$NON-NLS-1$
3682       case TokenNameLESS :
3683         return "<"; //$NON-NLS-1$
3684       case TokenNameLPAREN :
3685         return "("; //$NON-NLS-1$
3686       case TokenNameRPAREN :
3687         return ")"; //$NON-NLS-1$
3688       case TokenNameLBRACE :
3689         return "{"; //$NON-NLS-1$
3690       case TokenNameRBRACE :
3691         return "}"; //$NON-NLS-1$
3692       case TokenNameLBRACKET :
3693         return "["; //$NON-NLS-1$
3694       case TokenNameRBRACKET :
3695         return "]"; //$NON-NLS-1$
3696       case TokenNameSEMICOLON :
3697         return ";"; //$NON-NLS-1$
3698       case TokenNameQUESTION :
3699         return "?"; //$NON-NLS-1$
3700       case TokenNameCOLON :
3701         return ":"; //$NON-NLS-1$
3702       case TokenNameCOMMA :
3703         return ","; //$NON-NLS-1$
3704       case TokenNameDOT :
3705         return "."; //$NON-NLS-1$
3706       case TokenNameEQUAL :
3707         return "="; //$NON-NLS-1$
3708       case TokenNameAT :
3709         return "@";
3710       case TokenNameDOLLAR :
3711         return "$";
3712       //      case TokenNameDOLLAR_LBRACE :
3713       //        return "${";
3714       case TokenNameEOF :
3715         return "EOF"; //$NON-NLS-1$
3716       case TokenNameWHITESPACE :
3717         return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3718       case TokenNameCOMMENT_LINE :
3719         return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3720       case TokenNameCOMMENT_BLOCK :
3721         return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3722       case TokenNameCOMMENT_PHPDOC :
3723         return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3724         //      case TokenNameHTML :
3725         //        return "HTML(" + new String(getCurrentTokenSource()) + ")";
3726         // //$NON-NLS-1$
3727       case TokenNameFILE :
3728         return "__FILE__"; //$NON-NLS-1$
3729       case TokenNameLINE :
3730         return "__LINE__"; //$NON-NLS-1$
3731       case TokenNameCLASS_C :
3732         return "__CLASS__"; //$NON-NLS-1$
3733       case TokenNameMETHOD_C :
3734         return "__METHOD__"; //$NON-NLS-1$
3735       case TokenNameFUNC_C :
3736         return "__FUNCTION__"; //$NON-NLS-1
3737       case TokenNameboolCAST :
3738         return "( bool )"; //$NON-NLS-1$
3739       case TokenNameintCAST :
3740         return "( int )"; //$NON-NLS-1$
3741       case TokenNamedoubleCAST :
3742         return "( double )"; //$NON-NLS-1$
3743       case TokenNameobjectCAST :
3744         return "( object )"; //$NON-NLS-1$
3745       case TokenNamestringCAST :
3746         return "( string )"; //$NON-NLS-1$
3747       default :
3748         return "not-a-token(" + (new Integer(act)) + ") "
3749             + new String(getCurrentTokenSource()); //$NON-NLS-1$
3750     }
3751   }
3752   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3753       boolean checkNonExternalizedStringLiterals) {
3754     this(tokenizeComments, tokenizeWhiteSpace,
3755         checkNonExternalizedStringLiterals, false);
3756   }
3757   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3758       boolean checkNonExternalizedStringLiterals, boolean assertMode) {
3759     this.eofPosition = Integer.MAX_VALUE;
3760     this.tokenizeComments = tokenizeComments;
3761     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3762     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3763     this.assertMode = assertMode;
3764   }
3765   private void checkNonExternalizeString() throws InvalidInputException {
3766     if (currentLine == null)
3767       return;
3768     parseTags(currentLine);
3769   }
3770   private void parseTags(NLSLine line) throws InvalidInputException {
3771     String s = new String(getCurrentTokenSource());
3772     int pos = s.indexOf(TAG_PREFIX);
3773     int lineLength = line.size();
3774     while (pos != -1) {
3775       int start = pos + TAG_PREFIX_LENGTH;
3776       int end = s.indexOf(TAG_POSTFIX, start);
3777       String index = s.substring(start, end);
3778       int i = 0;
3779       try {
3780         i = Integer.parseInt(index) - 1;
3781         // Tags are one based not zero based.
3782       } catch (NumberFormatException e) {
3783         i = -1; // we don't want to consider this as a valid NLS tag
3784       }
3785       if (line.exists(i)) {
3786         line.set(i, null);
3787       }
3788       pos = s.indexOf(TAG_PREFIX, start);
3789     }
3790     this.nonNLSStrings = new StringLiteral[lineLength];
3791     int nonNLSCounter = 0;
3792     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3793       StringLiteral literal = (StringLiteral) iterator.next();
3794       if (literal != null) {
3795         this.nonNLSStrings[nonNLSCounter++] = literal;
3796       }
3797     }
3798     if (nonNLSCounter == 0) {
3799       this.nonNLSStrings = null;
3800       currentLine = null;
3801       return;
3802     }
3803     this.wasNonExternalizedStringLiteral = true;
3804     if (nonNLSCounter != lineLength) {
3805       System.arraycopy(this.nonNLSStrings, 0,
3806           (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0,
3807           nonNLSCounter);
3808     }
3809     currentLine = null;
3810   }
3811   public final void scanEscapeCharacter() throws InvalidInputException {
3812     // the string with "\\u" is a legal string of two chars \ and u
3813     //thus we use a direct access to the source (for regular cases).
3814     if (unicodeAsBackSlash) {
3815       // consume next character
3816       unicodeAsBackSlash = false;
3817       //                        if (((currentCharacter = source[currentPosition++]) == '\\') &&
3818       // (source[currentPosition] == 'u')) {
3819       //                                getNextUnicodeChar();
3820       //                        } else {
3821       if (withoutUnicodePtr != 0) {
3822         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3823         //                              }
3824       }
3825     } else
3826       currentCharacter = source[currentPosition++];
3827     switch (currentCharacter) {
3828       case 'b' :
3829         currentCharacter = '\b';
3830         break;
3831       case 't' :
3832         currentCharacter = '\t';
3833         break;
3834       case 'n' :
3835         currentCharacter = '\n';
3836         break;
3837       case 'f' :
3838         currentCharacter = '\f';
3839         break;
3840       case 'r' :
3841         currentCharacter = '\r';
3842         break;
3843       case '\"' :
3844         currentCharacter = '\"';
3845         break;
3846       case '\'' :
3847         currentCharacter = '\'';
3848         break;
3849       case '\\' :
3850         currentCharacter = '\\';
3851         break;
3852       default :
3853         // -----------octal escape--------------
3854         // OctalDigit
3855         // OctalDigit OctalDigit
3856         // ZeroToThree OctalDigit OctalDigit
3857         int number = Character.getNumericValue(currentCharacter);
3858         if (number >= 0 && number <= 7) {
3859           boolean zeroToThreeNot = number > 3;
3860           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3861             int digit = Character.getNumericValue(currentCharacter);
3862             if (digit >= 0 && digit <= 7) {
3863               number = (number * 8) + digit;
3864               if (Character
3865                   .isDigit(currentCharacter = source[currentPosition++])) {
3866                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
3867                   // Digit --> ignore last character
3868                   currentPosition--;
3869                 } else {
3870                   digit = Character.getNumericValue(currentCharacter);
3871                   if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
3872                     // OctalDigit OctalDigit
3873                     number = (number * 8) + digit;
3874                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit
3875                     // --> ignore last character
3876                     currentPosition--;
3877                   }
3878                 }
3879               } else { // has read \OctalDigit NonDigit--> ignore last
3880                 // character
3881                 currentPosition--;
3882               }
3883             } else { // has read \OctalDigit NonOctalDigit--> ignore last
3884               // character
3885               currentPosition--;
3886             }
3887           } else { // has read \OctalDigit --> ignore last character
3888             currentPosition--;
3889           }
3890           if (number > 255)
3891             throw new InvalidInputException(INVALID_ESCAPE);
3892           currentCharacter = (char) number;
3893         } else
3894           throw new InvalidInputException(INVALID_ESCAPE);
3895     }
3896   }
3897   // chech presence of task: tags
3898   public void checkTaskTag(int commentStart, int commentEnd) {
3899     // only look for newer task: tags
3900     if (this.foundTaskCount > 0
3901         && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
3902       return;
3903     }
3904     int foundTaskIndex = this.foundTaskCount;
3905     nextChar : for (int i = commentStart; i < commentEnd
3906         && i < this.eofPosition; i++) {
3907       char[] tag = null;
3908       char[] priority = null;
3909       // check for tag occurrence
3910       nextTag : for (int itag = 0; itag < this.taskTags.length; itag++) {
3911         tag = this.taskTags[itag];
3912         priority = this.taskPriorities != null
3913             && itag < this.taskPriorities.length
3914             ? this.taskPriorities[itag]
3915             : null;
3916         int tagLength = tag.length;
3917         for (int t = 0; t < tagLength; t++) {
3918           if (this.source[i + t] != tag[t])
3919             continue nextTag;
3920         }
3921         if (this.foundTaskTags == null) {
3922           this.foundTaskTags = new char[5][];
3923           this.foundTaskMessages = new char[5][];
3924           this.foundTaskPriorities = new char[5][];
3925           this.foundTaskPositions = new int[5][];
3926         } else if (this.foundTaskCount == this.foundTaskTags.length) {
3927           System.arraycopy(this.foundTaskTags, 0,
3928               this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
3929               this.foundTaskCount);
3930           System.arraycopy(this.foundTaskMessages, 0,
3931               this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
3932               this.foundTaskCount);
3933           System.arraycopy(this.foundTaskPriorities, 0,
3934               this.foundTaskPriorities = new char[this.foundTaskCount * 2][],
3935               0, this.foundTaskCount);
3936           System.arraycopy(this.foundTaskPositions, 0,
3937               this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
3938               this.foundTaskCount);
3939         }
3940         this.foundTaskTags[this.foundTaskCount] = tag;
3941         this.foundTaskPriorities[this.foundTaskCount] = priority;
3942         this.foundTaskPositions[this.foundTaskCount] = new int[]{i,
3943             i + tagLength - 1};
3944         this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
3945         this.foundTaskCount++;
3946         i += tagLength - 1; // will be incremented when looping
3947       }
3948     }
3949     for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
3950       // retrieve message start and end positions
3951       int msgStart = this.foundTaskPositions[i][0]
3952           + this.foundTaskTags[i].length;
3953       int max_value = i + 1 < this.foundTaskCount
3954           ? this.foundTaskPositions[i + 1][0] - 1
3955           : commentEnd - 1;
3956       // at most beginning of next task
3957       if (max_value < msgStart)
3958         max_value = msgStart; // would only occur if tag is before EOF.
3959       int end = -1;
3960       char c;
3961       for (int j = msgStart; j < max_value; j++) {
3962         if ((c = this.source[j]) == '\n' || c == '\r') {
3963           end = j - 1;
3964           break;
3965         }
3966       }
3967       if (end == -1) {
3968         for (int j = max_value; j > msgStart; j--) {
3969           if ((c = this.source[j]) == '*') {
3970             end = j - 1;
3971             break;
3972           }
3973         }
3974         if (end == -1)
3975           end = max_value;
3976       }
3977       if (msgStart == end)
3978         continue; // empty
3979       // trim the message
3980       while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
3981         end--;
3982       while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
3983         msgStart++;
3984       // update the end position of the task
3985       this.foundTaskPositions[i][1] = end;
3986       // get the message source
3987       final int messageLength = end - msgStart + 1;
3988       char[] message = new char[messageLength];
3989       System.arraycopy(source, msgStart, message, 0, messageLength);
3990       this.foundTaskMessages[i] = message;
3991     }
3992   }
3993 }