net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java

   1 /*******************************************************************************
   2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
   3  * All rights reserved. This program and the accompanying materials
   4  * are made available under the terms of the Common Public License v0.5
   5  * which accompanies this distribution, and is available at
   6  * http://www.eclipse.org/legal/cpl-v05.html
   7  *
   8  * Contributors:
   9  *     IBM Corporation - initial API and implementation
  10  ******************************************************************************/
  11 package net.sourceforge.phpdt.internal.compiler.parser;
  12 import java.util.ArrayList;
  13 import java.util.Iterator;
  14 import java.util.List;
  15 import java.util.Stack;
  16 import net.sourceforge.phpdt.core.compiler.CharOperation;
  17 import net.sourceforge.phpdt.core.compiler.IScanner;
  18 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
  19 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
  20 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
  21 public class Scanner implements IScanner, ITerminalSymbols {
  22   /*
  23    * APIs ares - getNextToken() which return the current type of the token
  24    * (this value is not memorized by the scanner) - getCurrentTokenSource()
  25    * which provides with the token "REAL" source (aka all unicode have been
  26    * transformed into a correct char) - sourceStart gives the position into the
  27    * stream - currentPosition-1 gives the sourceEnd position into the stream
  28    */
  29   // 1.4 feature
  30   private boolean assertMode;
  31   public boolean useAssertAsAnIndentifier = false;
  32   //flag indicating if processed source contains occurrences of keyword assert
  33   public boolean containsAssertKeyword = false;
  34   public boolean recordLineSeparator;
  35   public boolean phpMode = false;
  36   public Stack encapsedStringStack = null;
  37   public char currentCharacter;
  38   public int startPosition;
  39   public int currentPosition;
  40   public int initialPosition, eofPosition;
  41   // after this position eof are generated instead of real token from the
  42   // source
  43   public boolean tokenizeComments;
  44   public boolean tokenizeWhiteSpace;
  45   //source should be viewed as a window (aka a part)
  46   //of a entire very large stream
  47   public char source[];
  48   //unicode support
  49   public char[] withoutUnicodeBuffer;
  50   public int withoutUnicodePtr;
  51   //when == 0 ==> no unicode in the current token
  52   public boolean unicodeAsBackSlash = false;
  53   public boolean scanningFloatLiteral = false;
  54   //support for /** comments
  55   //public char[][] comments = new char[10][];
  56   public int[] commentStops = new int[10];
  57   public int[] commentStarts = new int[10];
  58   public int commentPtr = -1; // no comment test with commentPtr value -1
  59   //diet parsing support - jump over some method body when requested
  60   public boolean diet = false;
  61   //support for the poor-line-debuggers ....
  62   //remember the position of the cr/lf
  63   public int[] lineEnds = new int[250];
  64   public int linePtr = -1;
  65   public boolean wasAcr = false;
  66   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
  67   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
  68   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
  69   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
  70   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
  71   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
  72   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
  73   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
  74   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
  75   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
  76   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
  77   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
  78   //----------------optimized identifier managment------------------
  79   static final char[] charArray_a = new char[]{'a'},
  80       charArray_b = new char[]{'b'}, charArray_c = new char[]{'c'},
  81       charArray_d = new char[]{'d'}, charArray_e = new char[]{'e'},
  82       charArray_f = new char[]{'f'}, charArray_g = new char[]{'g'},
  83       charArray_h = new char[]{'h'}, charArray_i = new char[]{'i'},
  84       charArray_j = new char[]{'j'}, charArray_k = new char[]{'k'},
  85       charArray_l = new char[]{'l'}, charArray_m = new char[]{'m'},
  86       charArray_n = new char[]{'n'}, charArray_o = new char[]{'o'},
  87       charArray_p = new char[]{'p'}, charArray_q = new char[]{'q'},
  88       charArray_r = new char[]{'r'}, charArray_s = new char[]{'s'},
  89       charArray_t = new char[]{'t'}, charArray_u = new char[]{'u'},
  90       charArray_v = new char[]{'v'}, charArray_w = new char[]{'w'},
  91       charArray_x = new char[]{'x'}, charArray_y = new char[]{'y'},
  92       charArray_z = new char[]{'z'};
  93   static final char[] initCharArray = new char[]{'\u0000', '\u0000', '\u0000',
  94       '\u0000', '\u0000', '\u0000'};
  95   static final int TableSize = 30, InternalTableSize = 6;
  96   //30*6 = 180 entries
  97   public static final int OptimizedLength = 6;
  98   public/* static */
  99   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
 100   // support for detecting non-externalized string literals
 101   int currentLineNr = -1;
 102   int previousLineNr = -1;
 103   NLSLine currentLine = null;
 104   List lines = new ArrayList();
 105   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
 106   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
 107   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
 108   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
 109   public StringLiteral[] nonNLSStrings = null;
 110   public boolean checkNonExternalizedStringLiterals = true;
 111   public boolean wasNonExternalizedStringLiteral = false;
 112   /* static */{
 113     for (int i = 0; i < 6; i++) {
 114       for (int j = 0; j < TableSize; j++) {
 115         for (int k = 0; k < InternalTableSize; k++) {
 116           charArray_length[i][j][k] = initCharArray;
 117         }
 118       }
 119     }
 120   }
 121   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0,
 122       newEntry6 = 0;
 123   public static final int RoundBracket = 0;
 124   public static final int SquareBracket = 1;
 125   public static final int CurlyBracket = 2;
 126   public static final int BracketKinds = 3;
 127   // task tag support
 128   public char[][] foundTaskTags = null;
 129   public char[][] foundTaskMessages;
 130   public char[][] foundTaskPriorities = null;
 131   public int[][] foundTaskPositions;
 132   public int foundTaskCount = 0;
 133   public char[][] taskTags = null;
 134   public char[][] taskPriorities = null;
 135   public static final boolean DEBUG = false;
 136   public static final boolean TRACE = false;
 137   public Scanner() {
 138     this(false, false);
 139   }
 140   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
 141     this(tokenizeComments, tokenizeWhiteSpace, false);
 142   }
 143   /**
 144    * Determines if the specified character is permissible as the first
 145    * character in a PHP identifier
 146    */
 147   public static boolean isPHPIdentifierStart(char ch) {
 148     return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 149   }
 150   /**
 151    * Determines if the specified character may be part of a PHP identifier as
 152    * other than the first character
 153    */
 154   public static boolean isPHPIdentifierPart(char ch) {
 155     return Character.isLetterOrDigit(ch) || (ch == '_')
 156         || (0x7F <= ch && ch <= 0xFF);
 157   }
 158   public final boolean atEnd() {
 159     // This code is not relevant if source is
 160     // Only a part of the real stream input
 161     return source.length == currentPosition;
 162   }
 163   public char[] getCurrentIdentifierSource() {
 164     //return the token REAL source (aka unicodes are precomputed)
 165     char[] result;
 166     //    if (withoutUnicodePtr != 0)
 167     //      //0 is used as a fast test flag so the real first char is in position 1
 168     //      System.arraycopy(
 169     //        withoutUnicodeBuffer,
 170     //        1,
 171     //        result = new char[withoutUnicodePtr],
 172     //        0,
 173     //        withoutUnicodePtr);
 174     //    else {
 175     int length = currentPosition - startPosition;
 176     switch (length) { // see OptimizedLength
 177       case 1 :
 178         return optimizedCurrentTokenSource1();
 179       case 2 :
 180         return optimizedCurrentTokenSource2();
 181       case 3 :
 182         return optimizedCurrentTokenSource3();
 183       case 4 :
 184         return optimizedCurrentTokenSource4();
 185       case 5 :
 186         return optimizedCurrentTokenSource5();
 187       case 6 :
 188         return optimizedCurrentTokenSource6();
 189     }
 190     //no optimization
 191     System.arraycopy(source, startPosition, result = new char[length], 0,
 192         length);
 193     //   }
 194     return result;
 195   }
 196   public int getCurrentTokenEndPosition() {
 197     return this.currentPosition - 1;
 198   }
 199   public final char[] getCurrentTokenSource() {
 200     // Return the token REAL source (aka unicodes are precomputed)
 201     char[] result;
 202     //    if (withoutUnicodePtr != 0)
 203     //      // 0 is used as a fast test flag so the real first char is in position 1
 204     //      System.arraycopy(
 205     //        withoutUnicodeBuffer,
 206     //        1,
 207     //        result = new char[withoutUnicodePtr],
 208     //        0,
 209     //        withoutUnicodePtr);
 210     //    else {
 211     int length;
 212     System.arraycopy(source, startPosition,
 213         result = new char[length = currentPosition - startPosition], 0, length);
 214     //    }
 215     return result;
 216   }
 217   public final char[] getCurrentTokenSource(int startPos) {
 218     // Return the token REAL source (aka unicodes are precomputed)
 219     char[] result;
 220     //    if (withoutUnicodePtr != 0)
 221     //      // 0 is used as a fast test flag so the real first char is in position 1
 222     //      System.arraycopy(
 223     //        withoutUnicodeBuffer,
 224     //        1,
 225     //        result = new char[withoutUnicodePtr],
 226     //        0,
 227     //        withoutUnicodePtr);
 228     //    else {
 229     int length;
 230     System.arraycopy(source, startPos,
 231         result = new char[length = currentPosition - startPos], 0, length);
 232     //  }
 233     return result;
 234   }
 235   public final char[] getCurrentTokenSourceString() {
 236     //return the token REAL source (aka unicodes are precomputed).
 237     //REMOVE the two " that are at the beginning and the end.
 238     char[] result;
 239     if (withoutUnicodePtr != 0)
 240       //0 is used as a fast test flag so the real first char is in position 1
 241       System.arraycopy(withoutUnicodeBuffer, 2,
 242       //2 is 1 (real start) + 1 (to jump over the ")
 243           result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
 244     else {
 245       int length;
 246       System.arraycopy(source, startPosition + 1,
 247           result = new char[length = currentPosition - startPosition - 2], 0,
 248           length);
 249     }
 250     return result;
 251   }
 252   public int getCurrentTokenStartPosition() {
 253     return this.startPosition;
 254   }
 255   public final char[] getCurrentStringLiteralSource() {
 256     // Return the token REAL source (aka unicodes are precomputed)
 257     char[] result;
 258     int length;
 259     System.arraycopy(source, startPosition + 1,
 260         result = new char[length = currentPosition - startPosition - 2], 0,
 261         length);
 262     //    }
 263     return result;
 264   }
 265   /*
 266    * Search the source position corresponding to the end of a given line number
 267    *
 268    * Line numbers are 1-based, and relative to the scanner initialPosition.
 269    * Character positions are 0-based.
 270    *
 271    * In case the given line number is inconsistent, answers -1.
 272    */
 273   public final int getLineEnd(int lineNumber) {
 274     if (lineEnds == null)
 275       return -1;
 276     if (lineNumber >= lineEnds.length)
 277       return -1;
 278     if (lineNumber <= 0)
 279       return -1;
 280     if (lineNumber == lineEnds.length - 1)
 281       return eofPosition;
 282     return lineEnds[lineNumber - 1];
 283     // next line start one character behind the lineEnd of the previous line
 284   }
 285   /**
 286    * Search the source position corresponding to the beginning of a given line
 287    * number
 288    *
 289    * Line numbers are 1-based, and relative to the scanner initialPosition.
 290    * Character positions are 0-based.
 291    *
 292    * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
 293    *
 294    * In case the given line number is inconsistent, answers -1.
 295    */
 296   public final int getLineStart(int lineNumber) {
 297     if (lineEnds == null)
 298       return -1;
 299     if (lineNumber >= lineEnds.length)
 300       return -1;
 301     if (lineNumber <= 0)
 302       return -1;
 303     if (lineNumber == 1)
 304       return initialPosition;
 305     return lineEnds[lineNumber - 2] + 1;
 306     // next line start one character behind the lineEnd of the previous line
 307   }
 308   public final boolean getNextChar(char testedChar) {
 309     //BOOLEAN
 310     //handle the case of unicode.
 311     //when a unicode appears then we must use a buffer that holds char
 312     // internal values
 313     //At the end of this method currentCharacter holds the new visited char
 314     //and currentPosition points right next after it
 315     //Both previous lines are true if the currentCharacter is == to the
 316     // testedChar
 317     //On false, no side effect has occured.
 318     //ALL getNextChar.... ARE OPTIMIZED COPIES
 319     int temp = currentPosition;
 320     try {
 321       currentCharacter = source[currentPosition++];
 322       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 323       //        && (source[currentPosition] == 'u')) {
 324       //        //-------------unicode traitement ------------
 325       //        int c1, c2, c3, c4;
 326       //        int unicodeSize = 6;
 327       //        currentPosition++;
 328       //        while (source[currentPosition] == 'u') {
 329       //          currentPosition++;
 330       //          unicodeSize++;
 331       //        }
 332       //
 333       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 334       //          || c1 < 0)
 335       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 336       //            || c2 < 0)
 337       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 338       //            || c3 < 0)
 339       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 340       //            || c4 < 0)) {
 341       //          currentPosition = temp;
 342       //          return false;
 343       //        }
 344       //
 345       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 346       //        if (currentCharacter != testedChar) {
 347       //          currentPosition = temp;
 348       //          return false;
 349       //        }
 350       //        unicodeAsBackSlash = currentCharacter == '\\';
 351       //
 352       //        //need the unicode buffer
 353       //        if (withoutUnicodePtr == 0) {
 354       //          //buffer all the entries that have been left aside....
 355       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 356       //          System.arraycopy(
 357       //            source,
 358       //            startPosition,
 359       //            withoutUnicodeBuffer,
 360       //            1,
 361       //            withoutUnicodePtr);
 362       //        }
 363       //        //fill the buffer with the char
 364       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 365       //        return true;
 366       //
 367       //      } //-------------end unicode traitement--------------
 368       //      else {
 369       if (currentCharacter != testedChar) {
 370         currentPosition = temp;
 371         return false;
 372       }
 373       unicodeAsBackSlash = false;
 374       //        if (withoutUnicodePtr != 0)
 375       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 376       return true;
 377       //      }
 378     } catch (IndexOutOfBoundsException e) {
 379       unicodeAsBackSlash = false;
 380       currentPosition = temp;
 381       return false;
 382     }
 383   }
 384   public final int getNextChar(char testedChar1, char testedChar2) {
 385     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
 386     //test can be done with (x==0) for the first and (x>0) for the second
 387     //handle the case of unicode.
 388     //when a unicode appears then we must use a buffer that holds char
 389     // internal values
 390     //At the end of this method currentCharacter holds the new visited char
 391     //and currentPosition points right next after it
 392     //Both previous lines are true if the currentCharacter is == to the
 393     // testedChar1/2
 394     //On false, no side effect has occured.
 395     //ALL getNextChar.... ARE OPTIMIZED COPIES
 396     int temp = currentPosition;
 397     try {
 398       int result;
 399       currentCharacter = source[currentPosition++];
 400       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 401       //        && (source[currentPosition] == 'u')) {
 402       //        //-------------unicode traitement ------------
 403       //        int c1, c2, c3, c4;
 404       //        int unicodeSize = 6;
 405       //        currentPosition++;
 406       //        while (source[currentPosition] == 'u') {
 407       //          currentPosition++;
 408       //          unicodeSize++;
 409       //        }
 410       //
 411       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 412       //          || c1 < 0)
 413       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 414       //            || c2 < 0)
 415       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 416       //            || c3 < 0)
 417       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 418       //            || c4 < 0)) {
 419       //          currentPosition = temp;
 420       //          return 2;
 421       //        }
 422       //
 423       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 424       //        if (currentCharacter == testedChar1)
 425       //          result = 0;
 426       //        else if (currentCharacter == testedChar2)
 427       //          result = 1;
 428       //        else {
 429       //          currentPosition = temp;
 430       //          return -1;
 431       //        }
 432       //
 433       //        //need the unicode buffer
 434       //        if (withoutUnicodePtr == 0) {
 435       //          //buffer all the entries that have been left aside....
 436       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 437       //          System.arraycopy(
 438       //            source,
 439       //            startPosition,
 440       //            withoutUnicodeBuffer,
 441       //            1,
 442       //            withoutUnicodePtr);
 443       //        }
 444       //        //fill the buffer with the char
 445       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 446       //        return result;
 447       //      } //-------------end unicode traitement--------------
 448       //      else {
 449       if (currentCharacter == testedChar1)
 450         result = 0;
 451       else if (currentCharacter == testedChar2)
 452         result = 1;
 453       else {
 454         currentPosition = temp;
 455         return -1;
 456       }
 457       //        if (withoutUnicodePtr != 0)
 458       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 459       return result;
 460       //     }
 461     } catch (IndexOutOfBoundsException e) {
 462       currentPosition = temp;
 463       return -1;
 464     }
 465   }
 466   public final boolean getNextCharAsDigit() {
 467     //BOOLEAN
 468     //handle the case of unicode.
 469     //when a unicode appears then we must use a buffer that holds char
 470     // internal values
 471     //At the end of this method currentCharacter holds the new visited char
 472     //and currentPosition points right next after it
 473     //Both previous lines are true if the currentCharacter is a digit
 474     //On false, no side effect has occured.
 475     //ALL getNextChar.... ARE OPTIMIZED COPIES
 476     int temp = currentPosition;
 477     try {
 478       currentCharacter = source[currentPosition++];
 479       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 480       //        && (source[currentPosition] == 'u')) {
 481       //        //-------------unicode traitement ------------
 482       //        int c1, c2, c3, c4;
 483       //        int unicodeSize = 6;
 484       //        currentPosition++;
 485       //        while (source[currentPosition] == 'u') {
 486       //          currentPosition++;
 487       //          unicodeSize++;
 488       //        }
 489       //
 490       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 491       //          || c1 < 0)
 492       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 493       //            || c2 < 0)
 494       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 495       //            || c3 < 0)
 496       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 497       //            || c4 < 0)) {
 498       //          currentPosition = temp;
 499       //          return false;
 500       //        }
 501       //
 502       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 503       //        if (!Character.isDigit(currentCharacter)) {
 504       //          currentPosition = temp;
 505       //          return false;
 506       //        }
 507       //
 508       //        //need the unicode buffer
 509       //        if (withoutUnicodePtr == 0) {
 510       //          //buffer all the entries that have been left aside....
 511       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 512       //          System.arraycopy(
 513       //            source,
 514       //            startPosition,
 515       //            withoutUnicodeBuffer,
 516       //            1,
 517       //            withoutUnicodePtr);
 518       //        }
 519       //        //fill the buffer with the char
 520       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 521       //        return true;
 522       //      } //-------------end unicode traitement--------------
 523       //      else {
 524       if (!Character.isDigit(currentCharacter)) {
 525         currentPosition = temp;
 526         return false;
 527       }
 528       //        if (withoutUnicodePtr != 0)
 529       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 530       return true;
 531       //      }
 532     } catch (IndexOutOfBoundsException e) {
 533       currentPosition = temp;
 534       return false;
 535     }
 536   }
 537   public final boolean getNextCharAsDigit(int radix) {
 538     //BOOLEAN
 539     //handle the case of unicode.
 540     //when a unicode appears then we must use a buffer that holds char
 541     // internal values
 542     //At the end of this method currentCharacter holds the new visited char
 543     //and currentPosition points right next after it
 544     //Both previous lines are true if the currentCharacter is a digit base on
 545     // radix
 546     //On false, no side effect has occured.
 547     //ALL getNextChar.... ARE OPTIMIZED COPIES
 548     int temp = currentPosition;
 549     try {
 550       currentCharacter = source[currentPosition++];
 551       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 552       //        && (source[currentPosition] == 'u')) {
 553       //        //-------------unicode traitement ------------
 554       //        int c1, c2, c3, c4;
 555       //        int unicodeSize = 6;
 556       //        currentPosition++;
 557       //        while (source[currentPosition] == 'u') {
 558       //          currentPosition++;
 559       //          unicodeSize++;
 560       //        }
 561       //
 562       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 563       //          || c1 < 0)
 564       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 565       //            || c2 < 0)
 566       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 567       //            || c3 < 0)
 568       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 569       //            || c4 < 0)) {
 570       //          currentPosition = temp;
 571       //          return false;
 572       //        }
 573       //
 574       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 575       //        if (Character.digit(currentCharacter, radix) == -1) {
 576       //          currentPosition = temp;
 577       //          return false;
 578       //        }
 579       //
 580       //        //need the unicode buffer
 581       //        if (withoutUnicodePtr == 0) {
 582       //          //buffer all the entries that have been left aside....
 583       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 584       //          System.arraycopy(
 585       //            source,
 586       //            startPosition,
 587       //            withoutUnicodeBuffer,
 588       //            1,
 589       //            withoutUnicodePtr);
 590       //        }
 591       //        //fill the buffer with the char
 592       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 593       //        return true;
 594       //      } //-------------end unicode traitement--------------
 595       //      else {
 596       if (Character.digit(currentCharacter, radix) == -1) {
 597         currentPosition = temp;
 598         return false;
 599       }
 600       //        if (withoutUnicodePtr != 0)
 601       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 602       return true;
 603       //      }
 604     } catch (IndexOutOfBoundsException e) {
 605       currentPosition = temp;
 606       return false;
 607     }
 608   }
 609   public boolean getNextCharAsJavaIdentifierPart() {
 610     //BOOLEAN
 611     //handle the case of unicode.
 612     //when a unicode appears then we must use a buffer that holds char
 613     // internal values
 614     //At the end of this method currentCharacter holds the new visited char
 615     //and currentPosition points right next after it
 616     //Both previous lines are true if the currentCharacter is a
 617     // JavaIdentifierPart
 618     //On false, no side effect has occured.
 619     //ALL getNextChar.... ARE OPTIMIZED COPIES
 620     int temp = currentPosition;
 621     try {
 622       currentCharacter = source[currentPosition++];
 623       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 624       //        && (source[currentPosition] == 'u')) {
 625       //        //-------------unicode traitement ------------
 626       //        int c1, c2, c3, c4;
 627       //        int unicodeSize = 6;
 628       //        currentPosition++;
 629       //        while (source[currentPosition] == 'u') {
 630       //          currentPosition++;
 631       //          unicodeSize++;
 632       //        }
 633       //
 634       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 635       //          || c1 < 0)
 636       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 637       //            || c2 < 0)
 638       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 639       //            || c3 < 0)
 640       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 641       //            || c4 < 0)) {
 642       //          currentPosition = temp;
 643       //          return false;
 644       //        }
 645       //
 646       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 647       //        if (!isPHPIdentifierPart(currentCharacter)) {
 648       //          currentPosition = temp;
 649       //          return false;
 650       //        }
 651       //
 652       //        //need the unicode buffer
 653       //        if (withoutUnicodePtr == 0) {
 654       //          //buffer all the entries that have been left aside....
 655       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 656       //          System.arraycopy(
 657       //            source,
 658       //            startPosition,
 659       //            withoutUnicodeBuffer,
 660       //            1,
 661       //            withoutUnicodePtr);
 662       //        }
 663       //        //fill the buffer with the char
 664       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 665       //        return true;
 666       //      } //-------------end unicode traitement--------------
 667       //      else {
 668       if (!isPHPIdentifierPart(currentCharacter)) {
 669         currentPosition = temp;
 670         return false;
 671       }
 672       //        if (withoutUnicodePtr != 0)
 673       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 674       return true;
 675       //      }
 676     } catch (IndexOutOfBoundsException e) {
 677       currentPosition = temp;
 678       return false;
 679     }
 680   }
 681   public int getCastOrParen() {
 682     int tempPosition = currentPosition;
 683     char tempCharacter = currentCharacter;
 684     int tempToken = TokenNameLPAREN;
 685     boolean found = false;
 686     StringBuffer buf = new StringBuffer();
 687     try {
 688       do {
 689         currentCharacter = source[currentPosition++];
 690       } while (currentCharacter == ' ' || currentCharacter == '\t');
 691       while ((currentCharacter >= 'a' && currentCharacter <= 'z')
 692           || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
 693         buf.append(currentCharacter);
 694         currentCharacter = source[currentPosition++];
 695       }
 696       if (buf.length() >= 3 && buf.length() <= 7) {
 697         char[] data = buf.toString().toCharArray();
 698         int index = 0;
 699         switch (data.length) {
 700           case 3 :
 701             // int
 702             if ((data[index] == 'i') && (data[++index] == 'n')
 703                 && (data[++index] == 't')) {
 704               found = true;
 705               tempToken = TokenNameintCAST;
 706             }
 707             break;
 708           case 4 :
 709             // bool real
 710             if ((data[index] == 'b') && (data[++index] == 'o')
 711                 && (data[++index] == 'o') && (data[++index] == 'l')) {
 712               found = true;
 713               tempToken = TokenNameboolCAST;
 714             } else {
 715               index = 0;
 716               if ((data[index] == 'r') && (data[++index] == 'e')
 717                   && (data[++index] == 'a') && (data[++index] == 'l')) {
 718                 found = true;
 719                 tempToken = TokenNamedoubleCAST;
 720               }
 721             }
 722             break;
 723           case 5 :
 724             // array unset float
 725             if ((data[index] == 'a') && (data[++index] == 'r')
 726                 && (data[++index] == 'r') && (data[++index] == 'a')
 727                 && (data[++index] == 'y')) {
 728               found = true;
 729               tempToken = TokenNamearrayCAST;
 730             } else {
 731               index = 0;
 732               if ((data[index] == 'u') && (data[++index] == 'n')
 733                   && (data[++index] == 's') && (data[++index] == 'e')
 734                   && (data[++index] == 't')) {
 735                 found = true;
 736                 tempToken = TokenNameunsetCAST;
 737               } else {
 738                 index = 0;
 739                 if ((data[index] == 'f') && (data[++index] == 'l')
 740                     && (data[++index] == 'o') && (data[++index] == 'a')
 741                     && (data[++index] == 't')) {
 742                   found = true;
 743                   tempToken = TokenNamedoubleCAST;
 744                 }
 745               }
 746             }
 747             break;
 748           case 6 :
 749             // object string double
 750             if ((data[index] == 'o') && (data[++index] == 'b')
 751                 && (data[++index] == 'j') && (data[++index] == 'e')
 752                 && (data[++index] == 'c') && (data[++index] == 't')) {
 753               found = true;
 754               tempToken = TokenNameobjectCAST;
 755             } else {
 756               index = 0;
 757               if ((data[index] == 's') && (data[++index] == 't')
 758                   && (data[++index] == 'r') && (data[++index] == 'i')
 759                   && (data[++index] == 'n') && (data[++index] == 'g')) {
 760                 found = true;
 761                 tempToken = TokenNamestringCAST;
 762               } else {
 763                 index = 0;
 764                 if ((data[index] == 'd') && (data[++index] == 'o')
 765                     && (data[++index] == 'u') && (data[++index] == 'b')
 766                     && (data[++index] == 'l') && (data[++index] == 'e')) {
 767                   found = true;
 768                   tempToken = TokenNamedoubleCAST;
 769                 }
 770               }
 771             }
 772             break;
 773           case 7 :
 774             // boolean integer
 775             if ((data[index] == 'b') && (data[++index] == 'o')
 776                 && (data[++index] == 'o') && (data[++index] == 'l')
 777                 && (data[++index] == 'e') && (data[++index] == 'a')
 778                 && (data[++index] == 'n')) {
 779               found = true;
 780               tempToken = TokenNameboolCAST;
 781             } else {
 782               index = 0;
 783               if ((data[index] == 'i') && (data[++index] == 'n')
 784                   && (data[++index] == 't') && (data[++index] == 'e')
 785                   && (data[++index] == 'g') && (data[++index] == 'e')
 786                   && (data[++index] == 'r')) {
 787                 found = true;
 788                 tempToken = TokenNameintCAST;
 789               }
 790             }
 791             break;
 792         }
 793         if (found) {
 794           while (currentCharacter == ' ' || currentCharacter == '\t') {
 795             currentCharacter = source[currentPosition++];
 796           }
 797           if (currentCharacter == ')') {
 798             return tempToken;
 799           }
 800         }
 801       }
 802     } catch (IndexOutOfBoundsException e) {
 803     }
 804     currentCharacter = tempCharacter;
 805     currentPosition = tempPosition;
 806     return TokenNameLPAREN;
 807   }
 808   public int getNextToken() throws InvalidInputException {
 809     if (!phpMode) {
 810       return getInlinedHTML(currentPosition);
 811     }
 812     if (phpMode) {
 813       this.wasAcr = false;
 814       if (diet) {
 815         jumpOverMethodBody();
 816         diet = false;
 817         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
 818       }
 819       try {
 820         while (true) {
 821           withoutUnicodePtr = 0;
 822           //start with a new token
 823           char encapsedChar = ' ';
 824           if (!encapsedStringStack.isEmpty()) {
 825             encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
 826           }
 827           if (encapsedChar != '$' && encapsedChar != ' ') {
 828             currentCharacter = source[currentPosition++];
 829             if (currentCharacter == encapsedChar) {
 830               switch (currentCharacter) {
 831                 case '`' :
 832                   return TokenNameEncapsedString0;
 833                 case '\'' :
 834                   return TokenNameEncapsedString1;
 835                 case '"' :
 836                   return TokenNameEncapsedString2;
 837               }
 838             }
 839             while (currentCharacter != encapsedChar) {
 840               /** ** in PHP \r and \n are valid in string literals *** */
 841               switch (currentCharacter) {
 842                 case '\\' :
 843                   int escapeSize = currentPosition;
 844                   boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
 845                   //scanEscapeCharacter make a side effect on this value and
 846                   // we need the previous value few lines down this one
 847                   scanDoubleQuotedEscapeCharacter();
 848                   escapeSize = currentPosition - escapeSize;
 849                   if (withoutUnicodePtr == 0) {
 850                     //buffer all the entries that have been left aside....
 851                     withoutUnicodePtr = currentPosition - escapeSize - 1
 852                         - startPosition;
 853                     System.arraycopy(source, startPosition,
 854                         withoutUnicodeBuffer, 1, withoutUnicodePtr);
 855                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 856                   } else { //overwrite the / in the buffer
 857                     withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
 858                     if (backSlashAsUnicodeInString) { //there are TWO \ in
 859                       withoutUnicodePtr--;
 860                     }
 861                   }
 862                   break;
 863                 case '$' :
 864                   if (isPHPIdentifierStart(source[currentPosition])
 865                       || source[currentPosition] == '{') {
 866                     currentPosition--;
 867                     encapsedStringStack.push(new Character('$'));
 868                     return TokenNameSTRING;
 869                   }
 870                   break;
 871                 case '{' :
 872                   if (source[currentPosition] == '$') { // CURLY_OPEN
 873                     currentPosition--;
 874                     encapsedStringStack.push(new Character('$'));
 875                     return TokenNameSTRING;
 876                   }
 877               }
 878               // consume next character
 879               unicodeAsBackSlash = false;
 880               currentCharacter = source[currentPosition++];
 881               if (withoutUnicodePtr != 0) {
 882                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 883               }
 884               //                  }
 885             } // end while
 886             currentPosition--;
 887             return TokenNameSTRING;
 888           }
 889           // ---------Consume white space and handles startPosition---------
 890           int whiteStart = currentPosition;
 891           startPosition = currentPosition;
 892           currentCharacter = source[currentPosition++];
 893           if (encapsedChar == '$') {
 894             switch (currentCharacter) {
 895               case '\\' :
 896                 currentCharacter = source[currentPosition++];
 897                 return TokenNameSTRING;
 898               case '{' :
 899                 if (encapsedChar == '$') {
 900                   if (getNextChar('$'))
 901                     return TokenNameCURLY_OPEN;
 902                 }
 903                 return TokenNameLBRACE;
 904               case '}' :
 905                 return TokenNameRBRACE;
 906               case '[' :
 907                 return TokenNameLBRACKET;
 908               case ']' :
 909                 return TokenNameRBRACKET;
 910               case '\'' :
 911                 return TokenNameEncapsedString1;
 912               case '"' :
 913                 return TokenNameEncapsedString2;
 914               case '`' :
 915                 return TokenNameEncapsedString0;
 916               case '-' :
 917                 if (getNextChar('>'))
 918                   return TokenNameMINUS_GREATER;
 919                 return TokenNameSTRING;
 920               default :
 921                 if (currentCharacter == '$') {
 922                   int oldPosition = currentPosition;
 923                   try {
 924                     currentCharacter = source[currentPosition++];
 925                     if (currentCharacter == '{') {
 926                       return TokenNameDOLLAR_LBRACE;
 927                     }
 928                     if (isPHPIdentifierStart(currentCharacter)) {
 929                       return scanIdentifierOrKeyword(true);
 930                     } else {
 931                       currentPosition = oldPosition;
 932                       return TokenNameSTRING;
 933                     }
 934                   } catch (IndexOutOfBoundsException e) {
 935                     currentPosition = oldPosition;
 936                     return TokenNameSTRING;
 937                   }
 938                 }
 939                 if (isPHPIdentifierStart(currentCharacter))
 940                   return scanIdentifierOrKeyword(false);
 941                 if (Character.isDigit(currentCharacter))
 942                   return scanNumber(false);
 943                 return TokenNameERROR;
 944             }
 945           }
 946 //          boolean isWhiteSpace;
 947           while ((currentCharacter == ' ')
 948                   || Character.isWhitespace(currentCharacter)) {
 949             startPosition = currentPosition;
 950             currentCharacter = source[currentPosition++];
 951             //            if (((currentCharacter = source[currentPosition++]) == '\\')
 952             //              && (source[currentPosition] == 'u')) {
 953             //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
 954             //            } else {
 955             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 956               checkNonExternalizeString();
 957               if (recordLineSeparator) {
 958                 pushLineSeparator();
 959               } else {
 960                 currentLine = null;
 961               }
 962             }
 963 //            isWhiteSpace = (currentCharacter == ' ')
 964 //                || Character.isWhitespace(currentCharacter);
 965             //            }
 966           }
 967           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
 968             // reposition scanner in case we are interested by spaces as tokens
 969             currentPosition--;
 970             startPosition = whiteStart;
 971             return TokenNameWHITESPACE;
 972           }
 973           //little trick to get out in the middle of a source compuation
 974           if (currentPosition > eofPosition)
 975             return TokenNameEOF;
 976
 977             // ---------Identify the next token-------------
 978             switch (currentCharacter) {
 979               case '(' :
 980                 return getCastOrParen();
 981               case ')' :
 982                 return TokenNameRPAREN;
 983               case '{' :
 984                 return TokenNameLBRACE;
 985               case '}' :
 986                 return TokenNameRBRACE;
 987               case '[' :
 988                 return TokenNameLBRACKET;
 989               case ']' :
 990                 return TokenNameRBRACKET;
 991               case ';' :
 992                 return TokenNameSEMICOLON;
 993               case ',' :
 994                 return TokenNameCOMMA;
 995               case '.' :
 996                 if (getNextChar('='))
 997                   return TokenNameDOT_EQUAL;
 998                 if (getNextCharAsDigit())
 999                   return scanNumber(true);
1000                 return TokenNameDOT;
1001               case '+' :
1002                 {
1003                   int test;
1004                   if ((test = getNextChar('+', '=')) == 0)
1005                     return TokenNamePLUS_PLUS;
1006                   if (test > 0)
1007                     return TokenNamePLUS_EQUAL;
1008                   return TokenNamePLUS;
1009                 }
1010               case '-' :
1011               {
1012                   int test;
1013                   if ((test = getNextChar('-', '=')) == 0)
1014                     return TokenNameMINUS_MINUS;
1015                   if (test > 0)
1016                     return TokenNameMINUS_EQUAL;
1017                   if (getNextChar('>'))
1018                     return TokenNameMINUS_GREATER;
1019                   return TokenNameMINUS;
1020               }
1021               case '~' :
1022                 if (getNextChar('='))
1023                   return TokenNameTWIDDLE_EQUAL;
1024                 return TokenNameTWIDDLE;
1025               case '!' :
1026                 if (getNextChar('=')) {
1027                   if (getNextChar('=')) {
1028                     return TokenNameNOT_EQUAL_EQUAL;
1029                   }
1030                   return TokenNameNOT_EQUAL;
1031                 }
1032                 return TokenNameNOT;
1033               case '*' :
1034                 if (getNextChar('='))
1035                   return TokenNameMULTIPLY_EQUAL;
1036                 return TokenNameMULTIPLY;
1037               case '%' :
1038                 if (getNextChar('='))
1039                   return TokenNameREMAINDER_EQUAL;
1040                 return TokenNameREMAINDER;
1041               case '<' :
1042                 {
1043                   int oldPosition = currentPosition;
1044                   try {
1045                     currentCharacter = source[currentPosition++];
1046                   } catch (IndexOutOfBoundsException e) {
1047                     currentPosition = oldPosition;
1048                     return TokenNameLESS;
1049                   }
1050                   switch (currentCharacter) {
1051                     case '=' :
1052                       return TokenNameLESS_EQUAL;
1053                     case '>' :
1054                       return TokenNameNOT_EQUAL;
1055                     case '<' :
1056                       if (getNextChar('='))
1057                         return TokenNameLEFT_SHIFT_EQUAL;
1058                       if (getNextChar('<')) {
1059                         currentCharacter = source[currentPosition++];
1060                         while (Character.isWhitespace(currentCharacter)) {
1061                           currentCharacter = source[currentPosition++];
1062                         }
1063                         int heredocStart = currentPosition - 1;
1064                         int heredocLength = 0;
1065                         if (isPHPIdentifierStart(currentCharacter)) {
1066                           currentCharacter = source[currentPosition++];
1067                         } else {
1068                           return TokenNameERROR;
1069                         }
1070                         while (isPHPIdentifierPart(currentCharacter)) {
1071                           currentCharacter = source[currentPosition++];
1072                         }
1073                         heredocLength = currentPosition - heredocStart - 1;
1074                         // heredoc end-tag determination
1075                         boolean endTag = true;
1076                         char ch;
1077                         do {
1078                           ch = source[currentPosition++];
1079                           if (ch == '\r' || ch == '\n') {
1080                             if (recordLineSeparator) {
1081                               pushLineSeparator();
1082                             } else {
1083                               currentLine = null;
1084                             }
1085                             for (int i = 0; i < heredocLength; i++) {
1086                               if (source[currentPosition + i] != source[heredocStart
1087                                   + i]) {
1088                                 endTag = false;
1089                                 break;
1090                               }
1091                             }
1092                             if (endTag) {
1093                               currentPosition += heredocLength - 1;
1094                               currentCharacter = source[currentPosition++];
1095                               break; // do...while loop
1096                             } else {
1097                               endTag = true;
1098                             }
1099                           }
1100                         } while (true);
1101                         return TokenNameHEREDOC;
1102                       }
1103                       return TokenNameLEFT_SHIFT;
1104                   }
1105                   currentPosition = oldPosition;
1106                   return TokenNameLESS;
1107                 }
1108               case '>' :
1109                 {
1110                   int test;
1111                   if ((test = getNextChar('=', '>')) == 0)
1112                     return TokenNameGREATER_EQUAL;
1113                   if (test > 0) {
1114                     if ((test = getNextChar('=', '>')) == 0)
1115                       return TokenNameRIGHT_SHIFT_EQUAL;
1116                     return TokenNameRIGHT_SHIFT;
1117                   }
1118                   return TokenNameGREATER;
1119                 }
1120               case '=' :
1121                 if (getNextChar('=')) {
1122                   if (getNextChar('=')) {
1123                     return TokenNameEQUAL_EQUAL_EQUAL;
1124                   }
1125                   return TokenNameEQUAL_EQUAL;
1126                 }
1127                 if (getNextChar('>'))
1128                   return TokenNameEQUAL_GREATER;
1129                 return TokenNameEQUAL;
1130               case '&' :
1131                 {
1132                   int test;
1133                   if ((test = getNextChar('&', '=')) == 0)
1134                     return TokenNameAND_AND;
1135                   if (test > 0)
1136                     return TokenNameAND_EQUAL;
1137                   return TokenNameAND;
1138                 }
1139               case '|' :
1140                 {
1141                   int test;
1142                   if ((test = getNextChar('|', '=')) == 0)
1143                     return TokenNameOR_OR;
1144                   if (test > 0)
1145                     return TokenNameOR_EQUAL;
1146                   return TokenNameOR;
1147                 }
1148               case '^' :
1149                 if (getNextChar('='))
1150                   return TokenNameXOR_EQUAL;
1151                 return TokenNameXOR;
1152               case '?' :
1153                 if (getNextChar('>')) {
1154                   phpMode = false;
1155                   if (currentPosition == source.length) {
1156                     phpMode = true;
1157                     return TokenNameINLINE_HTML;
1158                   }
1159                   return getInlinedHTML(currentPosition - 2);
1160                 }
1161                 return TokenNameQUESTION;
1162               case ':' :
1163                 if (getNextChar(':'))
1164                   return TokenNamePAAMAYIM_NEKUDOTAYIM;
1165                 return TokenNameCOLON;
1166               case '@' :
1167                 return TokenNameAT;
1168               case '\'' :
1169 //                return TokenNameEncapsedString1;
1170                 try {
1171                   // consume next character
1172                   unicodeAsBackSlash = false;
1173                   currentCharacter = source[currentPosition++];
1174                   //                if (((currentCharacter = source[currentPosition++]) == '\\')
1175                   //                  && (source[currentPosition] == 'u')) {
1176                   //                  getNextUnicodeChar();
1177                   //                } else {
1178                   //                  if (withoutUnicodePtr != 0) {
1179                   //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1180                   //                      currentCharacter;
1181                   //                  }
1182                   //                }
1183                   while (currentCharacter != '\'') {
1184                     /** ** in PHP \r and \n are valid in string literals *** */
1185                     //                  if ((currentCharacter == '\n')
1186                     //                    || (currentCharacter == '\r')) {
1187                     //                    // relocate if finding another quote fairly close: thus
1188                     // unicode '/u000D' will be fully consumed
1189                     //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1190                     //                      if (currentPosition + lookAhead == source.length)
1191                     //                        break;
1192                     //                      if (source[currentPosition + lookAhead] == '\n')
1193                     //                        break;
1194                     //                      if (source[currentPosition + lookAhead] == '\"') {
1195                     //                        currentPosition += lookAhead + 1;
1196                     //                        break;
1197                     //                      }
1198                     //                    }
1199                     //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1200                     //                  }
1201                     if (currentCharacter == '\\') {
1202                       int escapeSize = currentPosition;
1203                       boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1204                       //scanEscapeCharacter make a side effect on this value and
1205                       // we need the previous value few lines down this one
1206                       scanSingleQuotedEscapeCharacter();
1207                       escapeSize = currentPosition - escapeSize;
1208                       if (withoutUnicodePtr == 0) {
1209                         //buffer all the entries that have been left aside....
1210                         withoutUnicodePtr = currentPosition - escapeSize - 1
1211                             - startPosition;
1212                         System.arraycopy(source, startPosition,
1213                             withoutUnicodeBuffer, 1, withoutUnicodePtr);
1214                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1215                       } else { //overwrite the / in the buffer
1216                         withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1217                         if (backSlashAsUnicodeInString) { //there are TWO \ in
1218                           // the stream where
1219                           // only one is correct
1220                           withoutUnicodePtr--;
1221                         }
1222                       }
1223                     }
1224                     // consume next character
1225                     unicodeAsBackSlash = false;
1226                     currentCharacter = source[currentPosition++];
1227                     //                  if (((currentCharacter = source[currentPosition++]) ==
1228                     // '\\')
1229                     //                    && (source[currentPosition] == 'u')) {
1230                     //                    getNextUnicodeChar();
1231                     //                  } else {
1232                     if (withoutUnicodePtr != 0) {
1233                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1234                     }
1235                     //                  }
1236                   }
1237                 } catch (IndexOutOfBoundsException e) {
1238                   throw new InvalidInputException(UNTERMINATED_STRING);
1239                 } catch (InvalidInputException e) {
1240                   if (e.getMessage().equals(INVALID_ESCAPE)) {
1241                     // relocate if finding another quote fairly close: thus
1242                     // unicode '/u000D' will be fully consumed
1243                     for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1244                       if (currentPosition + lookAhead == source.length)
1245                         break;
1246                       if (source[currentPosition + lookAhead] == '\n')
1247                         break;
1248                       if (source[currentPosition + lookAhead] == '\'') {
1249                         currentPosition += lookAhead + 1;
1250                         break;
1251                       }
1252                     }
1253                   }
1254                   throw e; // rethrow
1255                 }
1256                 if (checkNonExternalizedStringLiterals) { // check for presence
1257                   // of NLS tags
1258                   // //$NON-NLS-?$ where
1259                   // ? is an int.
1260                   if (currentLine == null) {
1261                     currentLine = new NLSLine();
1262                     lines.add(currentLine);
1263                   }
1264                   currentLine.add(new StringLiteral(
1265                       getCurrentTokenSourceString(), startPosition,
1266                       currentPosition - 1));
1267                 }
1268                 return TokenNameStringConstant;
1269               case '"' :
1270                 return TokenNameEncapsedString2;
1271               case '`' :
1272                 return TokenNameEncapsedString0;
1273               case '#' :
1274               case '/' :
1275                 {
1276                   char startChar = currentCharacter;
1277                   if (getNextChar('=')) {
1278                     return TokenNameDIVIDE_EQUAL;
1279                   }
1280                   int test;
1281                   if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1282                     //line comment
1283                     int endPositionForLineComment = 0;
1284                     try { //get the next char
1285                       currentCharacter = source[currentPosition++];
1286                       //                    if (((currentCharacter = source[currentPosition++])
1287                       //                      == '\\')
1288                       //                      && (source[currentPosition] == 'u')) {
1289                       //                      //-------------unicode traitement ------------
1290                       //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1291                       //                      currentPosition++;
1292                       //                      while (source[currentPosition] == 'u') {
1293                       //                        currentPosition++;
1294                       //                      }
1295                       //                      if ((c1 =
1296                       //                        Character.getNumericValue(source[currentPosition++]))
1297                       //                        > 15
1298                       //                        || c1 < 0
1299                       //                        || (c2 =
1300                       //                          Character.getNumericValue(source[currentPosition++]))
1301                       //                          > 15
1302                       //                        || c2 < 0
1303                       //                        || (c3 =
1304                       //                          Character.getNumericValue(source[currentPosition++]))
1305                       //                          > 15
1306                       //                        || c3 < 0
1307                       //                        || (c4 =
1308                       //                          Character.getNumericValue(source[currentPosition++]))
1309                       //                          > 15
1310                       //                        || c4 < 0) {
1311                       //                        throw new
1312                       // InvalidInputException(INVALID_UNICODE_ESCAPE);
1313                       //                      } else {
1314                       //                        currentCharacter =
1315                       //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1316                       //                      }
1317                       //                    }
1318                       //handle the \\u case manually into comment
1319                       //                    if (currentCharacter == '\\') {
1320                       //                      if (source[currentPosition] == '\\')
1321                       //                        currentPosition++;
1322                       //                    } //jump over the \\
1323                       boolean isUnicode = false;
1324                       while (currentCharacter != '\r'
1325                           && currentCharacter != '\n') {
1326                         if (currentCharacter == '?') {
1327                           if (getNextChar('>')) {
1328                             startPosition = currentPosition - 2;
1329                             phpMode = false;
1330                             return TokenNameINLINE_HTML;
1331                           }
1332                         }
1333                         //get the next char
1334                         isUnicode = false;
1335                         currentCharacter = source[currentPosition++];
1336                         //                      if (((currentCharacter = source[currentPosition++])
1337                         //                        == '\\')
1338                         //                        && (source[currentPosition] == 'u')) {
1339                         //                        isUnicode = true;
1340                         //                        //-------------unicode traitement ------------
1341                         //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1342                         //                        currentPosition++;
1343                         //                        while (source[currentPosition] == 'u') {
1344                         //                          currentPosition++;
1345                         //                        }
1346                         //                        if ((c1 =
1347                         //                          Character.getNumericValue(source[currentPosition++]))
1348                         //                          > 15
1349                         //                          || c1 < 0
1350                         //                          || (c2 =
1351                         //                            Character.getNumericValue(
1352                         //                              source[currentPosition++]))
1353                         //                            > 15
1354                         //                          || c2 < 0
1355                         //                          || (c3 =
1356                         //                            Character.getNumericValue(
1357                         //                              source[currentPosition++]))
1358                         //                            > 15
1359                         //                          || c3 < 0
1360                         //                          || (c4 =
1361                         //                            Character.getNumericValue(
1362                         //                              source[currentPosition++]))
1363                         //                            > 15
1364                         //                          || c4 < 0) {
1365                         //                          throw new
1366                         // InvalidInputException(INVALID_UNICODE_ESCAPE);
1367                         //                        } else {
1368                         //                          currentCharacter =
1369                         //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1370                         //                        }
1371                         //                      }
1372                         //handle the \\u case manually into comment
1373                         //                      if (currentCharacter == '\\') {
1374                         //                        if (source[currentPosition] == '\\')
1375                         //                          currentPosition++;
1376                         //                      } //jump over the \\
1377                       }
1378                       if (isUnicode) {
1379                         endPositionForLineComment = currentPosition - 6;
1380                       } else {
1381                         endPositionForLineComment = currentPosition - 1;
1382                       }
1383                       recordComment(false);
1384                       if ((currentCharacter == '\r')
1385                           || (currentCharacter == '\n')) {
1386                         checkNonExternalizeString();
1387                         if (recordLineSeparator) {
1388                           if (isUnicode) {
1389                             pushUnicodeLineSeparator();
1390                           } else {
1391                             pushLineSeparator();
1392                           }
1393                         } else {
1394                           currentLine = null;
1395                         }
1396                       }
1397                       if (tokenizeComments) {
1398                         if (!isUnicode) {
1399                           currentPosition = endPositionForLineComment;
1400                           // reset one character behind
1401                         }
1402                         return TokenNameCOMMENT_LINE;
1403                       }
1404                     } catch (IndexOutOfBoundsException e) { //an eof will them
1405                       // be generated
1406                       if (tokenizeComments) {
1407                         currentPosition--;
1408                         // reset one character behind
1409                         return TokenNameCOMMENT_LINE;
1410                       }
1411                     }
1412                     break;
1413                   }
1414                   if (test > 0) {
1415                     //traditional and annotation comment
1416                     boolean isJavadoc = false, star = false;
1417                     // consume next character
1418                     unicodeAsBackSlash = false;
1419                     currentCharacter = source[currentPosition++];
1420                     //                  if (((currentCharacter = source[currentPosition++]) ==
1421                     // '\\')
1422                     //                    && (source[currentPosition] == 'u')) {
1423                     //                    getNextUnicodeChar();
1424                     //                  } else {
1425                     //                    if (withoutUnicodePtr != 0) {
1426                     //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1427                     //                        currentCharacter;
1428                     //                    }
1429                     //                  }
1430                     if (currentCharacter == '*') {
1431                       isJavadoc = true;
1432                       star = true;
1433                     }
1434                     if ((currentCharacter == '\r')
1435                         || (currentCharacter == '\n')) {
1436                       checkNonExternalizeString();
1437                       if (recordLineSeparator) {
1438                         pushLineSeparator();
1439                       } else {
1440                         currentLine = null;
1441                       }
1442                     }
1443                     try { //get the next char
1444                       currentCharacter = source[currentPosition++];
1445                       //                    if (((currentCharacter = source[currentPosition++])
1446                       //                      == '\\')
1447                       //                      && (source[currentPosition] == 'u')) {
1448                       //                      //-------------unicode traitement ------------
1449                       //                      getNextUnicodeChar();
1450                       //                    }
1451                       //handle the \\u case manually into comment
1452                       //                    if (currentCharacter == '\\') {
1453                       //                      if (source[currentPosition] == '\\')
1454                       //                        currentPosition++;
1455                       //                      //jump over the \\
1456                       //                    }
1457                       // empty comment is not a javadoc /**/
1458                       if (currentCharacter == '/') {
1459                         isJavadoc = false;
1460                       }
1461                       //loop until end of comment */
1462                       while ((currentCharacter != '/') || (!star)) {
1463                         if ((currentCharacter == '\r')
1464                             || (currentCharacter == '\n')) {
1465                           checkNonExternalizeString();
1466                           if (recordLineSeparator) {
1467                             pushLineSeparator();
1468                           } else {
1469                             currentLine = null;
1470                           }
1471                         }
1472                         star = currentCharacter == '*';
1473                         //get next char
1474                         currentCharacter = source[currentPosition++];
1475                         //                      if (((currentCharacter = source[currentPosition++])
1476                         //                        == '\\')
1477                         //                        && (source[currentPosition] == 'u')) {
1478                         //                        //-------------unicode traitement ------------
1479                         //                        getNextUnicodeChar();
1480                         //                      }
1481                         //handle the \\u case manually into comment
1482                         //                      if (currentCharacter == '\\') {
1483                         //                        if (source[currentPosition] == '\\')
1484                         //                          currentPosition++;
1485                         //                      } //jump over the \\
1486                       }
1487                       recordComment(isJavadoc);
1488                       if (tokenizeComments) {
1489                         if (isJavadoc)
1490                           return TokenNameCOMMENT_PHPDOC;
1491                         return TokenNameCOMMENT_BLOCK;
1492                       }
1493                     } catch (IndexOutOfBoundsException e) {
1494                       throw new InvalidInputException(UNTERMINATED_COMMENT);
1495                     }
1496                     break;
1497                   }
1498                   return TokenNameDIVIDE;
1499                 }
1500               case '\u001a' :
1501                 if (atEnd())
1502                   return TokenNameEOF;
1503                 //the atEnd may not be <currentPosition == source.length> if
1504                 // source is only some part of a real (external) stream
1505                 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1506               default :
1507                 if (currentCharacter == '$') {
1508                   int oldPosition = currentPosition;
1509                   try {
1510                     currentCharacter = source[currentPosition++];
1511                     if (isPHPIdentifierStart(currentCharacter)) {
1512                       return scanIdentifierOrKeyword(true);
1513                     } else {
1514                       currentPosition = oldPosition;
1515                       return TokenNameDOLLAR;
1516                     }
1517                   } catch (IndexOutOfBoundsException e) {
1518                     currentPosition = oldPosition;
1519                     return TokenNameDOLLAR;
1520                   }
1521                 }
1522                 if (isPHPIdentifierStart(currentCharacter))
1523                   return scanIdentifierOrKeyword(false);
1524                 if (Character.isDigit(currentCharacter))
1525                   return scanNumber(false);
1526                 return TokenNameERROR;
1527             }
1528
1529         }
1530       } //-----------------end switch while try--------------------
1531       catch (IndexOutOfBoundsException e) {
1532       }
1533     }
1534     return TokenNameEOF;
1535   }
1536   /**
1537    * @return @throws
1538    *         InvalidInputException
1539    */
1540   private int getInlinedHTML(int start) throws InvalidInputException {
1541     //    int htmlPosition = start;
1542     if (currentPosition > source.length) {
1543       currentPosition = source.length;
1544       return TokenNameEOF;
1545     }
1546     startPosition = start;
1547     try {
1548       while (!phpMode) {
1549         currentCharacter = source[currentPosition++];
1550         if (currentCharacter == '<') {
1551           if (getNextChar('?')) {
1552             currentCharacter = source[currentPosition++];
1553             if ((currentCharacter == ' ')
1554                 || Character.isWhitespace(currentCharacter)) {
1555               // <?
1556               phpMode = true;
1557               return TokenNameINLINE_HTML;
1558             } else {
1559               boolean phpStart = (currentCharacter == 'P')
1560                   || (currentCharacter == 'p');
1561               if (phpStart) {
1562                 int test = getNextChar('H', 'h');
1563                 if (test >= 0) {
1564                   test = getNextChar('P', 'p');
1565                   if (test >= 0) {
1566                     // <?PHP <?php
1567                     phpMode = true;
1568                     return TokenNameINLINE_HTML;
1569                   }
1570                 }
1571               }
1572             }
1573           }
1574         }
1575         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1576           if (recordLineSeparator) {
1577             pushLineSeparator();
1578           } else {
1579             currentLine = null;
1580           }
1581         }
1582       } //-----------------while--------------------
1583       phpMode = true;
1584       return TokenNameINLINE_HTML;
1585     } //-----------------try--------------------
1586     catch (IndexOutOfBoundsException e) {
1587       startPosition = start;
1588       currentPosition--;
1589     }
1590     phpMode = true;
1591     return TokenNameINLINE_HTML;
1592   }
1593   //  public final void getNextUnicodeChar()
1594   //    throws IndexOutOfBoundsException, InvalidInputException {
1595   //    //VOID
1596   //    //handle the case of unicode.
1597   //    //when a unicode appears then we must use a buffer that holds char
1598   // internal values
1599   //    //At the end of this method currentCharacter holds the new visited char
1600   //    //and currentPosition points right next after it
1601   //
1602   //    //ALL getNextChar.... ARE OPTIMIZED COPIES
1603   //
1604   //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1605   //    currentPosition++;
1606   //    while (source[currentPosition] == 'u') {
1607   //      currentPosition++;
1608   //      unicodeSize++;
1609   //    }
1610   //
1611   //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1612   //      || c1 < 0
1613   //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1614   //      || c2 < 0
1615   //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1616   //      || c3 < 0
1617   //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1618   //      || c4 < 0) {
1619   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1620   //    } else {
1621   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1622   //      //need the unicode buffer
1623   //      if (withoutUnicodePtr == 0) {
1624   //        //buffer all the entries that have been left aside....
1625   //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1626   //        System.arraycopy(
1627   //          source,
1628   //          startPosition,
1629   //          withoutUnicodeBuffer,
1630   //          1,
1631   //          withoutUnicodePtr);
1632   //      }
1633   //      //fill the buffer with the char
1634   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1635   //    }
1636   //    unicodeAsBackSlash = currentCharacter == '\\';
1637   //  }
1638   /*
1639    * Tokenize a method body, assuming that curly brackets are properly
1640    * balanced.
1641    */
1642   public final void jumpOverMethodBody() {
1643     this.wasAcr = false;
1644     int found = 1;
1645     try {
1646       while (true) { //loop for jumping over comments
1647         // ---------Consume white space and handles startPosition---------
1648         boolean isWhiteSpace;
1649         do {
1650           startPosition = currentPosition;
1651           currentCharacter = source[currentPosition++];
1652           //          if (((currentCharacter = source[currentPosition++]) == '\\')
1653           //            && (source[currentPosition] == 'u')) {
1654           //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
1655           //          } else {
1656           if (recordLineSeparator
1657               && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1658             pushLineSeparator();
1659           isWhiteSpace = Character.isWhitespace(currentCharacter);
1660           //          }
1661         } while (isWhiteSpace);
1662         // -------consume token until } is found---------
1663         switch (currentCharacter) {
1664           case '{' :
1665             found++;
1666             break;
1667           case '}' :
1668             found--;
1669             if (found == 0)
1670               return;
1671             break;
1672           case '\'' :
1673             {
1674               boolean test;
1675               test = getNextChar('\\');
1676               if (test) {
1677                 try {
1678                   scanDoubleQuotedEscapeCharacter();
1679                 } catch (InvalidInputException ex) {
1680                 };
1681               } else {
1682                 //                try { // consume next character
1683                 unicodeAsBackSlash = false;
1684                 currentCharacter = source[currentPosition++];
1685                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1686                 //                    && (source[currentPosition] == 'u')) {
1687                 //                    getNextUnicodeChar();
1688                 //                  } else {
1689                 if (withoutUnicodePtr != 0) {
1690                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1691                 }
1692                 //                  }
1693                 //                } catch (InvalidInputException ex) {
1694                 //                };
1695               }
1696               getNextChar('\'');
1697               break;
1698             }
1699           case '"' :
1700             try {
1701               //              try { // consume next character
1702               unicodeAsBackSlash = false;
1703               currentCharacter = source[currentPosition++];
1704               //                if (((currentCharacter = source[currentPosition++]) == '\\')
1705               //                  && (source[currentPosition] == 'u')) {
1706               //                  getNextUnicodeChar();
1707               //                } else {
1708               if (withoutUnicodePtr != 0) {
1709                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1710               }
1711               //                }
1712               //              } catch (InvalidInputException ex) {
1713               //              };
1714               while (currentCharacter != '"') {
1715                 if (currentCharacter == '\r') {
1716                   if (source[currentPosition] == '\n')
1717                     currentPosition++;
1718                   break;
1719                   // the string cannot go further that the line
1720                 }
1721                 if (currentCharacter == '\n') {
1722                   break;
1723                   // the string cannot go further that the line
1724                 }
1725                 if (currentCharacter == '\\') {
1726                   try {
1727                     scanDoubleQuotedEscapeCharacter();
1728                   } catch (InvalidInputException ex) {
1729                   };
1730                 }
1731                 //                try { // consume next character
1732                 unicodeAsBackSlash = false;
1733                 currentCharacter = source[currentPosition++];
1734                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1735                 //                    && (source[currentPosition] == 'u')) {
1736                 //                    getNextUnicodeChar();
1737                 //                  } else {
1738                 if (withoutUnicodePtr != 0) {
1739                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1740                 }
1741                 //                  }
1742                 //                } catch (InvalidInputException ex) {
1743                 //                };
1744               }
1745             } catch (IndexOutOfBoundsException e) {
1746               return;
1747             }
1748             break;
1749           case '/' :
1750             {
1751               int test;
1752               if ((test = getNextChar('/', '*')) == 0) {
1753                 //line comment
1754                 try {
1755                   //get the next char
1756                   currentCharacter = source[currentPosition++];
1757                   //                  if (((currentCharacter = source[currentPosition++]) ==
1758                   // '\\')
1759                   //                    && (source[currentPosition] == 'u')) {
1760                   //                    //-------------unicode traitement ------------
1761                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1762                   //                    currentPosition++;
1763                   //                    while (source[currentPosition] == 'u') {
1764                   //                      currentPosition++;
1765                   //                    }
1766                   //                    if ((c1 =
1767                   //                      Character.getNumericValue(source[currentPosition++]))
1768                   //                      > 15
1769                   //                      || c1 < 0
1770                   //                      || (c2 =
1771                   //                        Character.getNumericValue(source[currentPosition++]))
1772                   //                        > 15
1773                   //                      || c2 < 0
1774                   //                      || (c3 =
1775                   //                        Character.getNumericValue(source[currentPosition++]))
1776                   //                        > 15
1777                   //                      || c3 < 0
1778                   //                      || (c4 =
1779                   //                        Character.getNumericValue(source[currentPosition++]))
1780                   //                        > 15
1781                   //                      || c4 < 0) {
1782                   //                      //error don't care of the value
1783                   //                      currentCharacter = 'A';
1784                   //                    } //something different from \n and \r
1785                   //                    else {
1786                   //                      currentCharacter =
1787                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1788                   //                    }
1789                   //                  }
1790                   while (currentCharacter != '\r' && currentCharacter != '\n') {
1791                     //get the next char
1792                     currentCharacter = source[currentPosition++];
1793                     //                    if (((currentCharacter = source[currentPosition++])
1794                     //                      == '\\')
1795                     //                      && (source[currentPosition] == 'u')) {
1796                     //                      //-------------unicode traitement ------------
1797                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1798                     //                      currentPosition++;
1799                     //                      while (source[currentPosition] == 'u') {
1800                     //                        currentPosition++;
1801                     //                      }
1802                     //                      if ((c1 =
1803                     //                        Character.getNumericValue(source[currentPosition++]))
1804                     //                        > 15
1805                     //                        || c1 < 0
1806                     //                        || (c2 =
1807                     //                          Character.getNumericValue(source[currentPosition++]))
1808                     //                          > 15
1809                     //                        || c2 < 0
1810                     //                        || (c3 =
1811                     //                          Character.getNumericValue(source[currentPosition++]))
1812                     //                          > 15
1813                     //                        || c3 < 0
1814                     //                        || (c4 =
1815                     //                          Character.getNumericValue(source[currentPosition++]))
1816                     //                          > 15
1817                     //                        || c4 < 0) {
1818                     //                        //error don't care of the value
1819                     //                        currentCharacter = 'A';
1820                     //                      } //something different from \n and \r
1821                     //                      else {
1822                     //                        currentCharacter =
1823                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1824                     //                      }
1825                     //                    }
1826                   }
1827                   if (recordLineSeparator
1828                       && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1829                     pushLineSeparator();
1830                 } catch (IndexOutOfBoundsException e) {
1831                 } //an eof will them be generated
1832                 break;
1833               }
1834               if (test > 0) {
1835                 //traditional and annotation comment
1836                 boolean star = false;
1837                 //                try { // consume next character
1838                 unicodeAsBackSlash = false;
1839                 currentCharacter = source[currentPosition++];
1840                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1841                 //                    && (source[currentPosition] == 'u')) {
1842                 //                    getNextUnicodeChar();
1843                 //                  } else {
1844                 if (withoutUnicodePtr != 0) {
1845                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1846                 }
1847                 //                  };
1848                 //                } catch (InvalidInputException ex) {
1849                 //                };
1850                 if (currentCharacter == '*') {
1851                   star = true;
1852                 }
1853                 if (recordLineSeparator
1854                     && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1855                   pushLineSeparator();
1856                 try { //get the next char
1857                   currentCharacter = source[currentPosition++];
1858                   //                  if (((currentCharacter = source[currentPosition++]) ==
1859                   // '\\')
1860                   //                    && (source[currentPosition] == 'u')) {
1861                   //                    //-------------unicode traitement ------------
1862                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1863                   //                    currentPosition++;
1864                   //                    while (source[currentPosition] == 'u') {
1865                   //                      currentPosition++;
1866                   //                    }
1867                   //                    if ((c1 =
1868                   //                      Character.getNumericValue(source[currentPosition++]))
1869                   //                      > 15
1870                   //                      || c1 < 0
1871                   //                      || (c2 =
1872                   //                        Character.getNumericValue(source[currentPosition++]))
1873                   //                        > 15
1874                   //                      || c2 < 0
1875                   //                      || (c3 =
1876                   //                        Character.getNumericValue(source[currentPosition++]))
1877                   //                        > 15
1878                   //                      || c3 < 0
1879                   //                      || (c4 =
1880                   //                        Character.getNumericValue(source[currentPosition++]))
1881                   //                        > 15
1882                   //                      || c4 < 0) {
1883                   //                      //error don't care of the value
1884                   //                      currentCharacter = 'A';
1885                   //                    } //something different from * and /
1886                   //                    else {
1887                   //                      currentCharacter =
1888                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1889                   //                    }
1890                   //                  }
1891                   //loop until end of comment */
1892                   while ((currentCharacter != '/') || (!star)) {
1893                     if (recordLineSeparator
1894                         && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1895                       pushLineSeparator();
1896                     star = currentCharacter == '*';
1897                     //get next char
1898                     currentCharacter = source[currentPosition++];
1899                     //                    if (((currentCharacter = source[currentPosition++])
1900                     //                      == '\\')
1901                     //                      && (source[currentPosition] == 'u')) {
1902                     //                      //-------------unicode traitement ------------
1903                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1904                     //                      currentPosition++;
1905                     //                      while (source[currentPosition] == 'u') {
1906                     //                        currentPosition++;
1907                     //                      }
1908                     //                      if ((c1 =
1909                     //                        Character.getNumericValue(source[currentPosition++]))
1910                     //                        > 15
1911                     //                        || c1 < 0
1912                     //                        || (c2 =
1913                     //                          Character.getNumericValue(source[currentPosition++]))
1914                     //                          > 15
1915                     //                        || c2 < 0
1916                     //                        || (c3 =
1917                     //                          Character.getNumericValue(source[currentPosition++]))
1918                     //                          > 15
1919                     //                        || c3 < 0
1920                     //                        || (c4 =
1921                     //                          Character.getNumericValue(source[currentPosition++]))
1922                     //                          > 15
1923                     //                        || c4 < 0) {
1924                     //                        //error don't care of the value
1925                     //                        currentCharacter = 'A';
1926                     //                      } //something different from * and /
1927                     //                      else {
1928                     //                        currentCharacter =
1929                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1930                     //                      }
1931                     //                    }
1932                   }
1933                 } catch (IndexOutOfBoundsException e) {
1934                   return;
1935                 }
1936                 break;
1937               }
1938               break;
1939             }
1940           default :
1941             if (isPHPIdentifierStart(currentCharacter)
1942                 || currentCharacter == '$') {
1943               try {
1944                 scanIdentifierOrKeyword((currentCharacter == '$'));
1945               } catch (InvalidInputException ex) {
1946               };
1947               break;
1948             }
1949             if (Character.isDigit(currentCharacter)) {
1950               try {
1951                 scanNumber(false);
1952               } catch (InvalidInputException ex) {
1953               };
1954               break;
1955             }
1956         }
1957       }
1958       //-----------------end switch while try--------------------
1959     } catch (IndexOutOfBoundsException e) {
1960     } catch (InvalidInputException e) {
1961     }
1962     return;
1963   }
1964   //  public final boolean jumpOverUnicodeWhiteSpace()
1965   //    throws InvalidInputException {
1966   //    //BOOLEAN
1967   //    //handle the case of unicode. Jump over the next whiteSpace
1968   //    //making startPosition pointing on the next available char
1969   //    //On false, the currentCharacter is filled up with a potential
1970   //    //correct char
1971   //
1972   //    try {
1973   //      this.wasAcr = false;
1974   //      int c1, c2, c3, c4;
1975   //      int unicodeSize = 6;
1976   //      currentPosition++;
1977   //      while (source[currentPosition] == 'u') {
1978   //        currentPosition++;
1979   //        unicodeSize++;
1980   //      }
1981   //
1982   //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1983   //        || c1 < 0)
1984   //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
1985   //          || c2 < 0)
1986   //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
1987   //          || c3 < 0)
1988   //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
1989   //          || c4 < 0)) {
1990   //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1991   //      }
1992   //
1993   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1994   //      if (recordLineSeparator
1995   //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1996   //        pushLineSeparator();
1997   //      if (Character.isWhitespace(currentCharacter))
1998   //        return true;
1999   //
2000   //      //buffer the new char which is not a white space
2001   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2002   //      //withoutUnicodePtr == 1 is true here
2003   //      return false;
2004   //    } catch (IndexOutOfBoundsException e) {
2005   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2006   //    }
2007   //  }
2008   public final int[] getLineEnds() {
2009     //return a bounded copy of this.lineEnds
2010     int[] copy;
2011     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2012     return copy;
2013   }
2014   public char[] getSource() {
2015     return this.source;
2016   }
2017   final char[] optimizedCurrentTokenSource1() {
2018     //return always the same char[] build only once
2019     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2020     char charOne = source[startPosition];
2021     switch (charOne) {
2022       case 'a' :
2023         return charArray_a;
2024       case 'b' :
2025         return charArray_b;
2026       case 'c' :
2027         return charArray_c;
2028       case 'd' :
2029         return charArray_d;
2030       case 'e' :
2031         return charArray_e;
2032       case 'f' :
2033         return charArray_f;
2034       case 'g' :
2035         return charArray_g;
2036       case 'h' :
2037         return charArray_h;
2038       case 'i' :
2039         return charArray_i;
2040       case 'j' :
2041         return charArray_j;
2042       case 'k' :
2043         return charArray_k;
2044       case 'l' :
2045         return charArray_l;
2046       case 'm' :
2047         return charArray_m;
2048       case 'n' :
2049         return charArray_n;
2050       case 'o' :
2051         return charArray_o;
2052       case 'p' :
2053         return charArray_p;
2054       case 'q' :
2055         return charArray_q;
2056       case 'r' :
2057         return charArray_r;
2058       case 's' :
2059         return charArray_s;
2060       case 't' :
2061         return charArray_t;
2062       case 'u' :
2063         return charArray_u;
2064       case 'v' :
2065         return charArray_v;
2066       case 'w' :
2067         return charArray_w;
2068       case 'x' :
2069         return charArray_x;
2070       case 'y' :
2071         return charArray_y;
2072       case 'z' :
2073         return charArray_z;
2074       default :
2075         return new char[]{charOne};
2076     }
2077   }
2078   final char[] optimizedCurrentTokenSource2() {
2079     //try to return the same char[] build only once
2080     char c0, c1;
2081     int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2082         % TableSize;
2083     char[][] table = charArray_length[0][hash];
2084     int i = newEntry2;
2085     while (++i < InternalTableSize) {
2086       char[] charArray = table[i];
2087       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2088         return charArray;
2089     }
2090     //---------other side---------
2091     i = -1;
2092     int max = newEntry2;
2093     while (++i <= max) {
2094       char[] charArray = table[i];
2095       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2096         return charArray;
2097     }
2098     //--------add the entry-------
2099     if (++max >= InternalTableSize)
2100       max = 0;
2101     char[] r;
2102     table[max] = (r = new char[]{c0, c1});
2103     newEntry2 = max;
2104     return r;
2105   }
2106   final char[] optimizedCurrentTokenSource3() {
2107     //try to return the same char[] build only once
2108     char c0, c1, c2;
2109     int hash = (((c0 = source[startPosition]) << 12)
2110         + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2111         % TableSize;
2112     char[][] table = charArray_length[1][hash];
2113     int i = newEntry3;
2114     while (++i < InternalTableSize) {
2115       char[] charArray = table[i];
2116       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2117         return charArray;
2118     }
2119     //---------other side---------
2120     i = -1;
2121     int max = newEntry3;
2122     while (++i <= max) {
2123       char[] charArray = table[i];
2124       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2125         return charArray;
2126     }
2127     //--------add the entry-------
2128     if (++max >= InternalTableSize)
2129       max = 0;
2130     char[] r;
2131     table[max] = (r = new char[]{c0, c1, c2});
2132     newEntry3 = max;
2133     return r;
2134   }
2135   final char[] optimizedCurrentTokenSource4() {
2136     //try to return the same char[] build only once
2137     char c0, c1, c2, c3;
2138     long hash = ((((long) (c0 = source[startPosition])) << 18)
2139         + ((c1 = source[startPosition + 1]) << 12)
2140         + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2141         % TableSize;
2142     char[][] table = charArray_length[2][(int) hash];
2143     int i = newEntry4;
2144     while (++i < InternalTableSize) {
2145       char[] charArray = table[i];
2146       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2147           && (c3 == charArray[3]))
2148         return charArray;
2149     }
2150     //---------other side---------
2151     i = -1;
2152     int max = newEntry4;
2153     while (++i <= max) {
2154       char[] charArray = table[i];
2155       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2156           && (c3 == charArray[3]))
2157         return charArray;
2158     }
2159     //--------add the entry-------
2160     if (++max >= InternalTableSize)
2161       max = 0;
2162     char[] r;
2163     table[max] = (r = new char[]{c0, c1, c2, c3});
2164     newEntry4 = max;
2165     return r;
2166   }
2167   final char[] optimizedCurrentTokenSource5() {
2168     //try to return the same char[] build only once
2169     char c0, c1, c2, c3, c4;
2170     long hash = ((((long) (c0 = source[startPosition])) << 24)
2171         + (((long) (c1 = source[startPosition + 1])) << 18)
2172         + ((c2 = source[startPosition + 2]) << 12)
2173         + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2174         % TableSize;
2175     char[][] table = charArray_length[3][(int) hash];
2176     int i = newEntry5;
2177     while (++i < InternalTableSize) {
2178       char[] charArray = table[i];
2179       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2180           && (c3 == charArray[3]) && (c4 == charArray[4]))
2181         return charArray;
2182     }
2183     //---------other side---------
2184     i = -1;
2185     int max = newEntry5;
2186     while (++i <= max) {
2187       char[] charArray = table[i];
2188       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2189           && (c3 == charArray[3]) && (c4 == charArray[4]))
2190         return charArray;
2191     }
2192     //--------add the entry-------
2193     if (++max >= InternalTableSize)
2194       max = 0;
2195     char[] r;
2196     table[max] = (r = new char[]{c0, c1, c2, c3, c4});
2197     newEntry5 = max;
2198     return r;
2199   }
2200   final char[] optimizedCurrentTokenSource6() {
2201     //try to return the same char[] build only once
2202     char c0, c1, c2, c3, c4, c5;
2203     long hash = ((((long) (c0 = source[startPosition])) << 32)
2204         + (((long) (c1 = source[startPosition + 1])) << 24)
2205         + (((long) (c2 = source[startPosition + 2])) << 18)
2206         + ((c3 = source[startPosition + 3]) << 12)
2207         + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2208         % TableSize;
2209     char[][] table = charArray_length[4][(int) hash];
2210     int i = newEntry6;
2211     while (++i < InternalTableSize) {
2212       char[] charArray = table[i];
2213       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2214           && (c3 == charArray[3]) && (c4 == charArray[4])
2215           && (c5 == charArray[5]))
2216         return charArray;
2217     }
2218     //---------other side---------
2219     i = -1;
2220     int max = newEntry6;
2221     while (++i <= max) {
2222       char[] charArray = table[i];
2223       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2224           && (c3 == charArray[3]) && (c4 == charArray[4])
2225           && (c5 == charArray[5]))
2226         return charArray;
2227     }
2228     //--------add the entry-------
2229     if (++max >= InternalTableSize)
2230       max = 0;
2231     char[] r;
2232     table[max] = (r = new char[]{c0, c1, c2, c3, c4, c5});
2233     newEntry6 = max;
2234     return r;
2235   }
2236   public final void pushLineSeparator() throws InvalidInputException {
2237     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2238     final int INCREMENT = 250;
2239     if (this.checkNonExternalizedStringLiterals) {
2240       // reinitialize the current line for non externalize strings purpose
2241       currentLine = null;
2242     }
2243     //currentCharacter is at position currentPosition-1
2244     // cr 000D
2245     if (currentCharacter == '\r') {
2246       int separatorPos = currentPosition - 1;
2247       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2248         return;
2249       //System.out.println("CR-" + separatorPos);
2250       try {
2251         lineEnds[++linePtr] = separatorPos;
2252       } catch (IndexOutOfBoundsException e) {
2253         //linePtr value is correct
2254         int oldLength = lineEnds.length;
2255         int[] old = lineEnds;
2256         lineEnds = new int[oldLength + INCREMENT];
2257         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2258         lineEnds[linePtr] = separatorPos;
2259       }
2260       // look-ahead for merged cr+lf
2261       try {
2262         if (source[currentPosition] == '\n') {
2263           //System.out.println("look-ahead LF-" + currentPosition);
2264           lineEnds[linePtr] = currentPosition;
2265           currentPosition++;
2266           wasAcr = false;
2267         } else {
2268           wasAcr = true;
2269         }
2270       } catch (IndexOutOfBoundsException e) {
2271         wasAcr = true;
2272       }
2273     } else {
2274       // lf 000A
2275       if (currentCharacter == '\n') {
2276         //must merge eventual cr followed by lf
2277         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2278           //System.out.println("merge LF-" + (currentPosition - 1));
2279           lineEnds[linePtr] = currentPosition - 1;
2280         } else {
2281           int separatorPos = currentPosition - 1;
2282           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2283             return;
2284           // System.out.println("LF-" + separatorPos);
2285           try {
2286             lineEnds[++linePtr] = separatorPos;
2287           } catch (IndexOutOfBoundsException e) {
2288             //linePtr value is correct
2289             int oldLength = lineEnds.length;
2290             int[] old = lineEnds;
2291             lineEnds = new int[oldLength + INCREMENT];
2292             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2293             lineEnds[linePtr] = separatorPos;
2294           }
2295         }
2296         wasAcr = false;
2297       }
2298     }
2299   }
2300   public final void pushUnicodeLineSeparator() {
2301     // isUnicode means that the \r or \n has been read as a unicode character
2302     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2303     final int INCREMENT = 250;
2304     //currentCharacter is at position currentPosition-1
2305     if (this.checkNonExternalizedStringLiterals) {
2306       // reinitialize the current line for non externalize strings purpose
2307       currentLine = null;
2308     }
2309     // cr 000D
2310     if (currentCharacter == '\r') {
2311       int separatorPos = currentPosition - 6;
2312       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2313         return;
2314       //System.out.println("CR-" + separatorPos);
2315       try {
2316         lineEnds[++linePtr] = separatorPos;
2317       } catch (IndexOutOfBoundsException e) {
2318         //linePtr value is correct
2319         int oldLength = lineEnds.length;
2320         int[] old = lineEnds;
2321         lineEnds = new int[oldLength + INCREMENT];
2322         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2323         lineEnds[linePtr] = separatorPos;
2324       }
2325       // look-ahead for merged cr+lf
2326       if (source[currentPosition] == '\n') {
2327         //System.out.println("look-ahead LF-" + currentPosition);
2328         lineEnds[linePtr] = currentPosition;
2329         currentPosition++;
2330         wasAcr = false;
2331       } else {
2332         wasAcr = true;
2333       }
2334     } else {
2335       // lf 000A
2336       if (currentCharacter == '\n') {
2337         //must merge eventual cr followed by lf
2338         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2339           //System.out.println("merge LF-" + (currentPosition - 1));
2340           lineEnds[linePtr] = currentPosition - 6;
2341         } else {
2342           int separatorPos = currentPosition - 6;
2343           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2344             return;
2345           // System.out.println("LF-" + separatorPos);
2346           try {
2347             lineEnds[++linePtr] = separatorPos;
2348           } catch (IndexOutOfBoundsException e) {
2349             //linePtr value is correct
2350             int oldLength = lineEnds.length;
2351             int[] old = lineEnds;
2352             lineEnds = new int[oldLength + INCREMENT];
2353             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2354             lineEnds[linePtr] = separatorPos;
2355           }
2356         }
2357         wasAcr = false;
2358       }
2359     }
2360   }
2361   public final void recordComment(boolean isJavadoc) {
2362     // a new annotation comment is recorded
2363     try {
2364       commentStops[++commentPtr] = isJavadoc
2365           ? currentPosition
2366           : -currentPosition;
2367     } catch (IndexOutOfBoundsException e) {
2368       int oldStackLength = commentStops.length;
2369       int[] oldStack = commentStops;
2370       commentStops = new int[oldStackLength + 30];
2371       System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2372       commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2373       //grows the positions buffers too
2374       int[] old = commentStarts;
2375       commentStarts = new int[oldStackLength + 30];
2376       System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2377     }
2378     //the buffer is of a correct size here
2379     commentStarts[commentPtr] = startPosition;
2380   }
2381   public void resetTo(int begin, int end) {
2382     //reset the scanner to a given position where it may rescan again
2383     diet = false;
2384     initialPosition = startPosition = currentPosition = begin;
2385     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2386     commentPtr = -1; // reset comment stack
2387   }
2388   public final void scanSingleQuotedEscapeCharacter()
2389       throws InvalidInputException {
2390     // the string with "\\u" is a legal string of two chars \ and u
2391     //thus we use a direct access to the source (for regular cases).
2392     //    if (unicodeAsBackSlash) {
2393     //      // consume next character
2394     //      unicodeAsBackSlash = false;
2395     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2396     //        && (source[currentPosition] == 'u')) {
2397     //        getNextUnicodeChar();
2398     //      } else {
2399     //        if (withoutUnicodePtr != 0) {
2400     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2401     //        }
2402     //      }
2403     //    } else
2404     currentCharacter = source[currentPosition++];
2405     switch (currentCharacter) {
2406       case '\'' :
2407         currentCharacter = '\'';
2408         break;
2409       case '\\' :
2410         currentCharacter = '\\';
2411         break;
2412       default :
2413         currentCharacter = '\\';
2414         currentPosition--;
2415     }
2416   }
2417   public final void scanDoubleQuotedEscapeCharacter()
2418       throws InvalidInputException {
2419     // the string with "\\u" is a legal string of two chars \ and u
2420     //thus we use a direct access to the source (for regular cases).
2421     //    if (unicodeAsBackSlash) {
2422     //      // consume next character
2423     //      unicodeAsBackSlash = false;
2424     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2425     //        && (source[currentPosition] == 'u')) {
2426     //        getNextUnicodeChar();
2427     //      } else {
2428     //        if (withoutUnicodePtr != 0) {
2429     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2430     //        }
2431     //      }
2432     //    } else
2433     currentCharacter = source[currentPosition++];
2434     switch (currentCharacter) {
2435       //      case 'b' :
2436       //        currentCharacter = '\b';
2437       //        break;
2438       case 't' :
2439         currentCharacter = '\t';
2440         break;
2441       case 'n' :
2442         currentCharacter = '\n';
2443         break;
2444       //      case 'f' :
2445       //        currentCharacter = '\f';
2446       //        break;
2447       case 'r' :
2448         currentCharacter = '\r';
2449         break;
2450       case '\"' :
2451         currentCharacter = '\"';
2452         break;
2453       case '\'' :
2454         currentCharacter = '\'';
2455         break;
2456       case '\\' :
2457         currentCharacter = '\\';
2458         break;
2459       case '$' :
2460         currentCharacter = '$';
2461         break;
2462       default :
2463         // -----------octal escape--------------
2464         // OctalDigit
2465         // OctalDigit OctalDigit
2466         // ZeroToThree OctalDigit OctalDigit
2467         int number = Character.getNumericValue(currentCharacter);
2468         if (number >= 0 && number <= 7) {
2469           boolean zeroToThreeNot = number > 3;
2470           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2471             int digit = Character.getNumericValue(currentCharacter);
2472             if (digit >= 0 && digit <= 7) {
2473               number = (number * 8) + digit;
2474               if (Character
2475                   .isDigit(currentCharacter = source[currentPosition++])) {
2476                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2477                   // Digit --> ignore last character
2478                   currentPosition--;
2479                 } else {
2480                   digit = Character.getNumericValue(currentCharacter);
2481                   if (digit >= 0 && digit <= 7) {
2482                     // has read \ZeroToThree OctalDigit OctalDigit
2483                     number = (number * 8) + digit;
2484                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2485                     // --> ignore last character
2486                     currentPosition--;
2487                   }
2488                 }
2489               } else { // has read \OctalDigit NonDigit--> ignore last
2490                 // character
2491                 currentPosition--;
2492               }
2493             } else { // has read \OctalDigit NonOctalDigit--> ignore last
2494               // character
2495               currentPosition--;
2496             }
2497           } else { // has read \OctalDigit --> ignore last character
2498             currentPosition--;
2499           }
2500           if (number > 255)
2501             throw new InvalidInputException(INVALID_ESCAPE);
2502           currentCharacter = (char) number;
2503         }
2504     //else
2505     //     throw new InvalidInputException(INVALID_ESCAPE);
2506     }
2507   }
2508   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2509   //    return scanIdentifierOrKeyword( false );
2510   //  }
2511   public int scanIdentifierOrKeyword(boolean isVariable)
2512       throws InvalidInputException {
2513     //test keywords
2514     //first dispatch on the first char.
2515     //then the length. If there are several
2516     //keywors with the same length AND the same first char, then do another
2517     //disptach on the second char :-)...cool....but fast !
2518     useAssertAsAnIndentifier = false;
2519     while (getNextCharAsJavaIdentifierPart()) {
2520     };
2521     if (isVariable) {
2522       //      if (new String(getCurrentTokenSource()).equals("$this")) {
2523       //        return TokenNamethis;
2524       //      }
2525       return TokenNameVariable;
2526     }
2527     int index, length;
2528     char[] data;
2529     char firstLetter;
2530     //    if (withoutUnicodePtr == 0)
2531     //quick test on length == 1 but not on length > 12 while most identifier
2532     //have a length which is <= 12...but there are lots of identifier with
2533     //only one char....
2534     //      {
2535     if ((length = currentPosition - startPosition) == 1)
2536       return TokenNameIdentifier;
2537     //  data = source;
2538     data = new char[length];
2539     index = startPosition;
2540     for (int i = 0; i < length; i++) {
2541       data[i] = Character.toLowerCase(source[index + i]);
2542     }
2543     index = 0;
2544     //    } else {
2545     //      if ((length = withoutUnicodePtr) == 1)
2546     //        return TokenNameIdentifier;
2547     //      // data = withoutUnicodeBuffer;
2548     //      data = new char[withoutUnicodeBuffer.length];
2549     //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2550     //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2551     //      }
2552     //      index = 1;
2553     //    }
2554     firstLetter = data[index];
2555     switch (firstLetter) {
2556       case '_' :
2557         switch (length) {
2558           case 8 :
2559             //__FILE__
2560             if ((data[++index] == '_') && (data[++index] == 'f')
2561                 && (data[++index] == 'i') && (data[++index] == 'l')
2562                 && (data[++index] == 'e') && (data[++index] == '_')
2563                 && (data[++index] == '_'))
2564               return TokenNameFILE;
2565             index = 0; //__LINE__
2566             if ((data[++index] == '_') && (data[++index] == 'l')
2567                 && (data[++index] == 'i') && (data[++index] == 'n')
2568                 && (data[++index] == 'e') && (data[++index] == '_')
2569                 && (data[++index] == '_'))
2570               return TokenNameLINE;
2571             break;
2572           case 9 :
2573             //__CLASS__
2574             if ((data[++index] == '_') && (data[++index] == 'c')
2575                 && (data[++index] == 'l') && (data[++index] == 'a')
2576                 && (data[++index] == 's') && (data[++index] == 's')
2577                 && (data[++index] == '_') && (data[++index] == '_'))
2578               return TokenNameCLASS_C;
2579             break;
2580           case 11 :
2581             //__METHOD__
2582             if ((data[++index] == '_') && (data[++index] == 'm')
2583                 && (data[++index] == 'e') && (data[++index] == 't')
2584                 && (data[++index] == 'h') && (data[++index] == 'o')
2585                 && (data[++index] == 'd') && (data[++index] == '_')
2586                 && (data[++index] == '_'))
2587               return TokenNameMETHOD_C;
2588             break;
2589           case 12 :
2590             //__FUNCTION__
2591             if ((data[++index] == '_') && (data[++index] == 'f')
2592                 && (data[++index] == 'u') && (data[++index] == 'n')
2593                 && (data[++index] == 'c') && (data[++index] == 't')
2594                 && (data[++index] == 'i') && (data[++index] == 'o')
2595                 && (data[++index] == 'n') && (data[++index] == '_')
2596                 && (data[++index] == '_'))
2597               return TokenNameFUNC_C;
2598             break;
2599         }
2600         return TokenNameIdentifier;
2601       case 'a' :
2602         // as and array abstract
2603         switch (length) {
2604           case 2 :
2605             //as
2606             if ((data[++index] == 's')) {
2607               return TokenNameas;
2608             } else {
2609               return TokenNameIdentifier;
2610             }
2611           case 3 :
2612             //and
2613             if ((data[++index] == 'n') && (data[++index] == 'd')) {
2614               return TokenNameand;
2615             } else {
2616               return TokenNameIdentifier;
2617             }
2618           case 5 :
2619             // array
2620             if ((data[++index] == 'r') && (data[++index] == 'r')
2621                 && (data[++index] == 'a') && (data[++index] == 'y'))
2622               return TokenNamearray;
2623             else
2624               return TokenNameIdentifier;
2625           case 8 :
2626             if ((data[++index] == 'b') && (data[++index] == 's')
2627                 && (data[++index] == 't') && (data[++index] == 'r')
2628                 && (data[++index] == 'a') && (data[++index] == 'c')
2629                 && (data[++index] == 't'))
2630               return TokenNameabstract;
2631             else
2632               return TokenNameIdentifier;
2633           default :
2634             return TokenNameIdentifier;
2635         }
2636       case 'b' :
2637         //break
2638         switch (length) {
2639           case 5 :
2640             if ((data[++index] == 'r') && (data[++index] == 'e')
2641                 && (data[++index] == 'a') && (data[++index] == 'k'))
2642               return TokenNamebreak;
2643             else
2644               return TokenNameIdentifier;
2645           default :
2646             return TokenNameIdentifier;
2647         }
2648       case 'c' :
2649         //case catch class clone const continue
2650         switch (length) {
2651           case 4 :
2652             if ((data[++index] == 'a') && (data[++index] == 's')
2653                 && (data[++index] == 'e'))
2654               return TokenNamecase;
2655             else
2656               return TokenNameIdentifier;
2657           case 5 :
2658             if ((data[++index] == 'a') && (data[++index] == 't')
2659                 && (data[++index] == 'c') && (data[++index] == 'h'))
2660               return TokenNamecatch;
2661             index = 0;
2662             if ((data[++index] == 'l') && (data[++index] == 'a')
2663                 && (data[++index] == 's') && (data[++index] == 's'))
2664               return TokenNameclass;
2665             index = 0;
2666             if ((data[++index] == 'l') && (data[++index] == 'o')
2667                 && (data[++index] == 'n') && (data[++index] == 'e'))
2668               return TokenNameclone;
2669             index = 0;
2670             if ((data[++index] == 'o') && (data[++index] == 'n')
2671                 && (data[++index] == 's') && (data[++index] == 't'))
2672               return TokenNameconst;
2673             else
2674               return TokenNameIdentifier;
2675           case 8 :
2676             if ((data[++index] == 'o') && (data[++index] == 'n')
2677                 && (data[++index] == 't') && (data[++index] == 'i')
2678                 && (data[++index] == 'n') && (data[++index] == 'u')
2679                 && (data[++index] == 'e'))
2680               return TokenNamecontinue;
2681             else
2682               return TokenNameIdentifier;
2683           default :
2684             return TokenNameIdentifier;
2685         }
2686       case 'd' :
2687         // declare default do die
2688         // TODO delete define ==> no keyword !
2689         switch (length) {
2690           case 2 :
2691             if ((data[++index] == 'o'))
2692               return TokenNamedo;
2693             else
2694               return TokenNameIdentifier;
2695           //          case 6 :
2696           //            if ((data[++index] == 'e')
2697           //              && (data[++index] == 'f')
2698           //              && (data[++index] == 'i')
2699           //              && (data[++index] == 'n')
2700           //              && (data[++index] == 'e'))
2701           //              return TokenNamedefine;
2702           //            else
2703           //              return TokenNameIdentifier;
2704           case 7 :
2705             if ((data[++index] == 'e') && (data[++index] == 'c')
2706                 && (data[++index] == 'l') && (data[++index] == 'a')
2707                 && (data[++index] == 'r') && (data[++index] == 'e'))
2708               return TokenNamedeclare;
2709             index = 0;
2710             if ((data[++index] == 'e') && (data[++index] == 'f')
2711                 && (data[++index] == 'a') && (data[++index] == 'u')
2712                 && (data[++index] == 'l') && (data[++index] == 't'))
2713               return TokenNamedefault;
2714             else
2715               return TokenNameIdentifier;
2716           default :
2717             return TokenNameIdentifier;
2718         }
2719       case 'e' :
2720         //echo else exit elseif extends eval
2721         switch (length) {
2722           case 4 :
2723             if ((data[++index] == 'c') && (data[++index] == 'h')
2724                 && (data[++index] == 'o'))
2725               return TokenNameecho;
2726             else if ((data[index] == 'l') && (data[++index] == 's')
2727                 && (data[++index] == 'e'))
2728               return TokenNameelse;
2729             else if ((data[index] == 'x') && (data[++index] == 'i')
2730                 && (data[++index] == 't'))
2731               return TokenNameexit;
2732             else if ((data[index] == 'v') && (data[++index] == 'a')
2733                 && (data[++index] == 'l'))
2734               return TokenNameeval;
2735             else
2736               return TokenNameIdentifier;
2737           case 5 :
2738             // endif empty
2739             if ((data[++index] == 'n') && (data[++index] == 'd')
2740                 && (data[++index] == 'i') && (data[++index] == 'f'))
2741               return TokenNameendif;
2742             if ((data[index] == 'm') && (data[++index] == 'p')
2743                 && (data[++index] == 't') && (data[++index] == 'y'))
2744               return TokenNameempty;
2745             else
2746               return TokenNameIdentifier;
2747           case 6 :
2748             // endfor
2749             if ((data[++index] == 'n') && (data[++index] == 'd')
2750                 && (data[++index] == 'f') && (data[++index] == 'o')
2751                 && (data[++index] == 'r'))
2752               return TokenNameendfor;
2753             else if ((data[index] == 'l') && (data[++index] == 's')
2754                 && (data[++index] == 'e') && (data[++index] == 'i')
2755                 && (data[++index] == 'f'))
2756               return TokenNameelseif;
2757             else
2758               return TokenNameIdentifier;
2759           case 7 :
2760             if ((data[++index] == 'x') && (data[++index] == 't')
2761                 && (data[++index] == 'e') && (data[++index] == 'n')
2762                 && (data[++index] == 'd') && (data[++index] == 's'))
2763               return TokenNameextends;
2764             else
2765               return TokenNameIdentifier;
2766           case 8 :
2767             // endwhile
2768             if ((data[++index] == 'n') && (data[++index] == 'd')
2769                 && (data[++index] == 'w') && (data[++index] == 'h')
2770                 && (data[++index] == 'i') && (data[++index] == 'l')
2771                 && (data[++index] == 'e'))
2772               return TokenNameendwhile;
2773             else
2774               return TokenNameIdentifier;
2775           case 9 :
2776             // endswitch
2777             if ((data[++index] == 'n') && (data[++index] == 'd')
2778                 && (data[++index] == 's') && (data[++index] == 'w')
2779                 && (data[++index] == 'i') && (data[++index] == 't')
2780                 && (data[++index] == 'c') && (data[++index] == 'h'))
2781               return TokenNameendswitch;
2782             else
2783               return TokenNameIdentifier;
2784           case 10 :
2785             // enddeclare
2786             if ((data[++index] == 'n') && (data[++index] == 'd')
2787                 && (data[++index] == 'd') && (data[++index] == 'e')
2788                 && (data[++index] == 'c') && (data[++index] == 'l')
2789                 && (data[++index] == 'a') && (data[++index] == 'r')
2790                 && (data[++index] == 'e'))
2791               return TokenNameendforeach;
2792             index = 0;
2793             if ((data[++index] == 'n') // endforeach
2794                 && (data[++index] == 'd') && (data[++index] == 'f')
2795                 && (data[++index] == 'o') && (data[++index] == 'r')
2796                 && (data[++index] == 'e') && (data[++index] == 'a')
2797                 && (data[++index] == 'c') && (data[++index] == 'h'))
2798               return TokenNameendforeach;
2799             else
2800               return TokenNameIdentifier;
2801           default :
2802             return TokenNameIdentifier;
2803         }
2804       case 'f' :
2805         //for false final function
2806         switch (length) {
2807           case 3 :
2808             if ((data[++index] == 'o') && (data[++index] == 'r'))
2809               return TokenNamefor;
2810             else
2811               return TokenNameIdentifier;
2812           case 5 :
2813             //            if ((data[++index] == 'a') && (data[++index] == 'l')
2814             //                && (data[++index] == 's') && (data[++index] == 'e'))
2815             //              return TokenNamefalse;
2816             if ((data[++index] == 'i') && (data[++index] == 'n')
2817                 && (data[++index] == 'a') && (data[++index] == 'l'))
2818               return TokenNamefinal;
2819             else
2820               return TokenNameIdentifier;
2821           case 7 :
2822             // foreach
2823             if ((data[++index] == 'o') && (data[++index] == 'r')
2824                 && (data[++index] == 'e') && (data[++index] == 'a')
2825                 && (data[++index] == 'c') && (data[++index] == 'h'))
2826               return TokenNameforeach;
2827             else
2828               return TokenNameIdentifier;
2829           case 8 :
2830             // function
2831             if ((data[++index] == 'u') && (data[++index] == 'n')
2832                 && (data[++index] == 'c') && (data[++index] == 't')
2833                 && (data[++index] == 'i') && (data[++index] == 'o')
2834                 && (data[++index] == 'n'))
2835               return TokenNamefunction;
2836             else
2837               return TokenNameIdentifier;
2838           default :
2839             return TokenNameIdentifier;
2840         }
2841       case 'g' :
2842         //global
2843         if (length == 6) {
2844           if ((data[++index] == 'l') && (data[++index] == 'o')
2845               && (data[++index] == 'b') && (data[++index] == 'a')
2846               && (data[++index] == 'l')) {
2847             return TokenNameglobal;
2848           }
2849         }
2850         return TokenNameIdentifier;
2851       case 'i' :
2852         //if int isset include include_once instanceof interface implements
2853         switch (length) {
2854           case 2 :
2855             if (data[++index] == 'f')
2856               return TokenNameif;
2857             else
2858               return TokenNameIdentifier;
2859           //          case 3 :
2860           //            if ((data[++index] == 'n') && (data[++index] == 't'))
2861           //              return TokenNameint;
2862           //            else
2863           //              return TokenNameIdentifier;
2864           case 5 :
2865             if ((data[++index] == 's') && (data[++index] == 's')
2866                 && (data[++index] == 'e') && (data[++index] == 't'))
2867               return TokenNameisset;
2868             else
2869               return TokenNameIdentifier;
2870           case 7 :
2871             if ((data[++index] == 'n') && (data[++index] == 'c')
2872                 && (data[++index] == 'l') && (data[++index] == 'u')
2873                 && (data[++index] == 'd') && (data[++index] == 'e'))
2874               return TokenNameinclude;
2875             else
2876               return TokenNameIdentifier;
2877           case 9 :
2878             // interface
2879             if ((data[++index] == 'n') && (data[++index] == 't')
2880                 && (data[++index] == 'e') && (data[++index] == 'r')
2881                 && (data[++index] == 'f') && (data[++index] == 'a')
2882                 && (data[++index] == 'c') && (data[++index] == 'e'))
2883               return TokenNameinterface;
2884             else
2885               return TokenNameIdentifier;
2886           case 10 :
2887             // instanceof
2888             if ((data[++index] == 'n') && (data[++index] == 's')
2889                 && (data[++index] == 't') && (data[++index] == 'a')
2890                 && (data[++index] == 'n') && (data[++index] == 'c')
2891                 && (data[++index] == 'e') && (data[++index] == 'o')
2892                 && (data[++index] == 'f'))
2893               return TokenNameinstanceof;
2894             if ((data[index] == 'm') && (data[++index] == 'p')
2895                 && (data[++index] == 'l') && (data[++index] == 'e')
2896                 && (data[++index] == 'm') && (data[++index] == 'e')
2897                 && (data[++index] == 'n') && (data[++index] == 't')
2898                 && (data[++index] == 's'))
2899               return TokenNameimplements;
2900             else
2901               return TokenNameIdentifier;
2902           case 12 :
2903             if ((data[++index] == 'n') && (data[++index] == 'c')
2904                 && (data[++index] == 'l') && (data[++index] == 'u')
2905                 && (data[++index] == 'd') && (data[++index] == 'e')
2906                 && (data[++index] == '_') && (data[++index] == 'o')
2907                 && (data[++index] == 'n') && (data[++index] == 'c')
2908                 && (data[++index] == 'e'))
2909               return TokenNameinclude_once;
2910             else
2911               return TokenNameIdentifier;
2912           default :
2913             return TokenNameIdentifier;
2914         }
2915       case 'l' :
2916         //list
2917         if (length == 4) {
2918           if ((data[++index] == 'i') && (data[++index] == 's')
2919               && (data[++index] == 't')) {
2920             return TokenNamelist;
2921           }
2922         }
2923         return TokenNameIdentifier;
2924       case 'n' :
2925         // new null
2926         switch (length) {
2927           case 3 :
2928             if ((data[++index] == 'e') && (data[++index] == 'w'))
2929               return TokenNamenew;
2930             else
2931               return TokenNameIdentifier;
2932           //          case 4 :
2933           //            if ((data[++index] == 'u') && (data[++index] == 'l')
2934           //                && (data[++index] == 'l'))
2935           //              return TokenNamenull;
2936           //            else
2937           //              return TokenNameIdentifier;
2938           default :
2939             return TokenNameIdentifier;
2940         }
2941       case 'o' :
2942         // or old_function
2943         if (length == 2) {
2944           if (data[++index] == 'r') {
2945             return TokenNameor;
2946           }
2947         }
2948         //        if (length == 12) {
2949         //          if ((data[++index] == 'l')
2950         //            && (data[++index] == 'd')
2951         //            && (data[++index] == '_')
2952         //            && (data[++index] == 'f')
2953         //            && (data[++index] == 'u')
2954         //            && (data[++index] == 'n')
2955         //            && (data[++index] == 'c')
2956         //            && (data[++index] == 't')
2957         //            && (data[++index] == 'i')
2958         //            && (data[++index] == 'o')
2959         //            && (data[++index] == 'n')) {
2960         //            return TokenNameold_function;
2961         //          }
2962         //        }
2963         return TokenNameIdentifier;
2964       case 'p' :
2965         // print public private protected
2966         switch (length) {
2967           case 5 :
2968             if ((data[++index] == 'r') && (data[++index] == 'i')
2969                 && (data[++index] == 'n') && (data[++index] == 't')) {
2970               return TokenNameprint;
2971             } else
2972               return TokenNameIdentifier;
2973           case 6 :
2974             if ((data[++index] == 'u') && (data[++index] == 'b')
2975                 && (data[++index] == 'l') && (data[++index] == 'i')
2976                 && (data[++index] == 'c')) {
2977               return TokenNamepublic;
2978             } else
2979               return TokenNameIdentifier;
2980           case 7 :
2981             if ((data[++index] == 'r') && (data[++index] == 'i')
2982                 && (data[++index] == 'v') && (data[++index] == 'a')
2983                 && (data[++index] == 't') && (data[++index] == 'e')) {
2984               return TokenNameprivate;
2985             } else
2986               return TokenNameIdentifier;
2987           case 9 :
2988             if ((data[++index] == 'r') && (data[++index] == 'o')
2989                 && (data[++index] == 't') && (data[++index] == 'e')
2990                 && (data[++index] == 'c') && (data[++index] == 't')
2991                 && (data[++index] == 'e') && (data[++index] == 'd')) {
2992               return TokenNameprotected;
2993             } else
2994               return TokenNameIdentifier;
2995         }
2996         return TokenNameIdentifier;
2997       case 'r' :
2998         //return require require_once
2999         if (length == 6) {
3000           if ((data[++index] == 'e') && (data[++index] == 't')
3001               && (data[++index] == 'u') && (data[++index] == 'r')
3002               && (data[++index] == 'n')) {
3003             return TokenNamereturn;
3004           }
3005         } else if (length == 7) {
3006           if ((data[++index] == 'e') && (data[++index] == 'q')
3007               && (data[++index] == 'u') && (data[++index] == 'i')
3008               && (data[++index] == 'r') && (data[++index] == 'e')) {
3009             return TokenNamerequire;
3010           }
3011         } else if (length == 12) {
3012           if ((data[++index] == 'e') && (data[++index] == 'q')
3013               && (data[++index] == 'u') && (data[++index] == 'i')
3014               && (data[++index] == 'r') && (data[++index] == 'e')
3015               && (data[++index] == '_') && (data[++index] == 'o')
3016               && (data[++index] == 'n') && (data[++index] == 'c')
3017               && (data[++index] == 'e')) {
3018             return TokenNamerequire_once;
3019           }
3020         } else
3021           return TokenNameIdentifier;
3022       case 's' :
3023         //static switch
3024         switch (length) {
3025           case 6 :
3026             if (data[++index] == 't')
3027               if ((data[++index] == 'a') && (data[++index] == 't')
3028                   && (data[++index] == 'i') && (data[++index] == 'c')) {
3029                 return TokenNamestatic;
3030               } else
3031                 return TokenNameIdentifier;
3032             else if ((data[index] == 'w') && (data[++index] == 'i')
3033                 && (data[++index] == 't') && (data[++index] == 'c')
3034                 && (data[++index] == 'h'))
3035               return TokenNameswitch;
3036             else
3037               return TokenNameIdentifier;
3038           default :
3039             return TokenNameIdentifier;
3040         }
3041       case 't' :
3042         // try true throw
3043         switch (length) {
3044           case 3 :
3045             if ((data[++index] == 'r') && (data[++index] == 'y'))
3046               return TokenNametry;
3047             else
3048               return TokenNameIdentifier;
3049           //          case 4 :
3050           //            if ((data[++index] == 'r') && (data[++index] == 'u')
3051           //                && (data[++index] == 'e'))
3052           //              return TokenNametrue;
3053           //            else
3054           //              return TokenNameIdentifier;
3055           case 5 :
3056             if ((data[++index] == 'h') && (data[++index] == 'r')
3057                 && (data[++index] == 'o') && (data[++index] == 'w'))
3058               return TokenNamethrow;
3059             else
3060               return TokenNameIdentifier;
3061           default :
3062             return TokenNameIdentifier;
3063         }
3064       case 'u' :
3065         //use unset
3066         switch (length) {
3067           case 3 :
3068             if ((data[++index] == 's') && (data[++index] == 'e'))
3069               return TokenNameuse;
3070             else
3071               return TokenNameIdentifier;
3072           case 5 :
3073             if ((data[++index] == 'n') && (data[++index] == 's')
3074                 && (data[++index] == 'e') && (data[++index] == 't'))
3075               return TokenNameunset;
3076             else
3077               return TokenNameIdentifier;
3078           default :
3079             return TokenNameIdentifier;
3080         }
3081       case 'v' :
3082         //var
3083         switch (length) {
3084           case 3 :
3085             if ((data[++index] == 'a') && (data[++index] == 'r'))
3086               return TokenNamevar;
3087             else
3088               return TokenNameIdentifier;
3089           default :
3090             return TokenNameIdentifier;
3091         }
3092       case 'w' :
3093         //while
3094         switch (length) {
3095           case 5 :
3096             if ((data[++index] == 'h') && (data[++index] == 'i')
3097                 && (data[++index] == 'l') && (data[++index] == 'e'))
3098               return TokenNamewhile;
3099             else
3100               return TokenNameIdentifier;
3101           //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3102           // (data[++index]=='e') && (data[++index]=='f')&&
3103           // (data[++index]=='p'))
3104           //return TokenNamewidefp ;
3105           //else
3106           //return TokenNameIdentifier;
3107           default :
3108             return TokenNameIdentifier;
3109         }
3110       case 'x' :
3111         //xor
3112         switch (length) {
3113           case 3 :
3114             if ((data[++index] == 'o') && (data[++index] == 'r'))
3115               return TokenNamexor;
3116             else
3117               return TokenNameIdentifier;
3118           default :
3119             return TokenNameIdentifier;
3120         }
3121       default :
3122         return TokenNameIdentifier;
3123     }
3124   }
3125   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3126     //when entering this method the currentCharacter is the firt
3127     //digit of the number , i.e. it may be preceeded by a . when
3128     //dotPrefix is true
3129     boolean floating = dotPrefix;
3130     if ((!dotPrefix) && (currentCharacter == '0')) {
3131       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3132         //force the first char of the hexa number do exist...
3133         // consume next character
3134         unicodeAsBackSlash = false;
3135         currentCharacter = source[currentPosition++];
3136         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3137         //          && (source[currentPosition] == 'u')) {
3138         //          getNextUnicodeChar();
3139         //        } else {
3140         //          if (withoutUnicodePtr != 0) {
3141         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3142         //          }
3143         //        }
3144         if (Character.digit(currentCharacter, 16) == -1)
3145           throw new InvalidInputException(INVALID_HEXA);
3146         //---end forcing--
3147         while (getNextCharAsDigit(16)) {
3148         };
3149         //        if (getNextChar('l', 'L') >= 0)
3150         //          return TokenNameLongLiteral;
3151         //        else
3152         return TokenNameIntegerLiteral;
3153       }
3154       //there is x or X in the number
3155       //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3156       // 00078.0 is true !!!!! crazy language
3157       if (getNextCharAsDigit()) {
3158         //-------------potential octal-----------------
3159         while (getNextCharAsDigit()) {
3160         };
3161         //        if (getNextChar('l', 'L') >= 0) {
3162         //          return TokenNameLongLiteral;
3163         //        }
3164         //
3165         //        if (getNextChar('f', 'F') >= 0) {
3166         //          return TokenNameFloatingPointLiteral;
3167         //        }
3168         if (getNextChar('d', 'D') >= 0) {
3169           return TokenNameDoubleLiteral;
3170         } else { //make the distinction between octal and float ....
3171           if (getNextChar('.')) { //bingo ! ....
3172             while (getNextCharAsDigit()) {
3173             };
3174             if (getNextChar('e', 'E') >= 0) {
3175               // consume next character
3176               unicodeAsBackSlash = false;
3177               currentCharacter = source[currentPosition++];
3178               //              if (((currentCharacter = source[currentPosition++]) == '\\')
3179               //                && (source[currentPosition] == 'u')) {
3180               //                getNextUnicodeChar();
3181               //              } else {
3182               //                if (withoutUnicodePtr != 0) {
3183               //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3184               //                }
3185               //              }
3186               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3187                 // consume next character
3188                 unicodeAsBackSlash = false;
3189                 currentCharacter = source[currentPosition++];
3190                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3191                 //                  && (source[currentPosition] == 'u')) {
3192                 //                  getNextUnicodeChar();
3193                 //                } else {
3194                 //                  if (withoutUnicodePtr != 0) {
3195                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3196                 //                      currentCharacter;
3197                 //                  }
3198                 //                }
3199               }
3200               if (!Character.isDigit(currentCharacter))
3201                 throw new InvalidInputException(INVALID_FLOAT);
3202               while (getNextCharAsDigit()) {
3203               };
3204             }
3205             //            if (getNextChar('f', 'F') >= 0)
3206             //              return TokenNameFloatingPointLiteral;
3207             getNextChar('d', 'D'); //jump over potential d or D
3208             return TokenNameDoubleLiteral;
3209           } else {
3210             return TokenNameIntegerLiteral;
3211           }
3212         }
3213       } else {
3214         /* carry on */
3215       }
3216     }
3217     while (getNextCharAsDigit()) {
3218     };
3219     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3220     //      return TokenNameLongLiteral;
3221     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3222       while (getNextCharAsDigit()) {
3223       };
3224       floating = true;
3225     }
3226     //if floating is true both exponant and suffix may be optional
3227     if (getNextChar('e', 'E') >= 0) {
3228       floating = true;
3229       // consume next character
3230       unicodeAsBackSlash = false;
3231       currentCharacter = source[currentPosition++];
3232       //      if (((currentCharacter = source[currentPosition++]) == '\\')
3233       //        && (source[currentPosition] == 'u')) {
3234       //        getNextUnicodeChar();
3235       //      } else {
3236       //        if (withoutUnicodePtr != 0) {
3237       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3238       //        }
3239       //      }
3240       if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3241         // next
3242         // character
3243         unicodeAsBackSlash = false;
3244         currentCharacter = source[currentPosition++];
3245         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3246         //          && (source[currentPosition] == 'u')) {
3247         //          getNextUnicodeChar();
3248         //        } else {
3249         //          if (withoutUnicodePtr != 0) {
3250         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3251         //          }
3252         //        }
3253       }
3254       if (!Character.isDigit(currentCharacter))
3255         throw new InvalidInputException(INVALID_FLOAT);
3256       while (getNextCharAsDigit()) {
3257       };
3258     }
3259     if (getNextChar('d', 'D') >= 0)
3260       return TokenNameDoubleLiteral;
3261     //    if (getNextChar('f', 'F') >= 0)
3262     //      return TokenNameFloatingPointLiteral;
3263     //the long flag has been tested before
3264     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3265   }
3266   /**
3267    * Search the line number corresponding to a specific position
3268    *
3269    */
3270   public final int getLineNumber(int position) {
3271     if (lineEnds == null)
3272       return 1;
3273     int length = linePtr + 1;
3274     if (length == 0)
3275       return 1;
3276     int g = 0, d = length - 1;
3277     int m = 0;
3278     while (g <= d) {
3279       m = (g + d) / 2;
3280       if (position < lineEnds[m]) {
3281         d = m - 1;
3282       } else if (position > lineEnds[m]) {
3283         g = m + 1;
3284       } else {
3285         return m + 1;
3286       }
3287     }
3288     if (position < lineEnds[m]) {
3289       return m + 1;
3290     }
3291     return m + 2;
3292   }
3293   public void setPHPMode(boolean mode) {
3294     phpMode = mode;
3295   }
3296   public final void setSource(char[] source) {
3297     //the source-buffer is set to sourceString
3298     if (source == null) {
3299       this.source = new char[0];
3300     } else {
3301       this.source = source;
3302     }
3303     startPosition = -1;
3304     initialPosition = currentPosition = 0;
3305     containsAssertKeyword = false;
3306     withoutUnicodeBuffer = new char[this.source.length];
3307     encapsedStringStack = new Stack();
3308   }
3309   public String toString() {
3310     if (startPosition == source.length)
3311       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3312     if (currentPosition > source.length)
3313       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3314     char front[] = new char[startPosition];
3315     System.arraycopy(source, 0, front, 0, startPosition);
3316     int middleLength = (currentPosition - 1) - startPosition + 1;
3317     char middle[];
3318     if (middleLength > -1) {
3319       middle = new char[middleLength];
3320       System.arraycopy(source, startPosition, middle, 0, middleLength);
3321     } else {
3322       middle = new char[0];
3323     }
3324     char end[] = new char[source.length - (currentPosition - 1)];
3325     System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length
3326         - (currentPosition - 1) - 1);
3327     return new String(front)
3328         + "\n===============================\nStarts here -->" //$NON-NLS-1$
3329         + new String(middle)
3330         + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3331         + new String(end);
3332   }
3333   public final String toStringAction(int act) {
3334     switch (act) {
3335       case TokenNameERROR :
3336         return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3337       // //$NON-NLS-1$
3338       case TokenNameINLINE_HTML :
3339         return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3340       case TokenNameIdentifier :
3341         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3342       case TokenNameVariable :
3343         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3344       case TokenNameabstract :
3345         return "abstract"; //$NON-NLS-1$
3346       case TokenNameand :
3347         return "AND"; //$NON-NLS-1$
3348       case TokenNamearray :
3349         return "array"; //$NON-NLS-1$
3350       case TokenNameas :
3351         return "as"; //$NON-NLS-1$
3352       case TokenNamebreak :
3353         return "break"; //$NON-NLS-1$
3354       case TokenNamecase :
3355         return "case"; //$NON-NLS-1$
3356       case TokenNameclass :
3357         return "class"; //$NON-NLS-1$
3358       case TokenNamecatch :
3359         return "catch"; //$NON-NLS-1$
3360       case TokenNameclone :
3361         //$NON-NLS-1$
3362         return "clone";
3363       case TokenNameconst :
3364         //$NON-NLS-1$
3365         return "const";
3366       case TokenNamecontinue :
3367         return "continue"; //$NON-NLS-1$
3368       case TokenNamedefault :
3369         return "default"; //$NON-NLS-1$
3370       //      case TokenNamedefine :
3371       //        return "define"; //$NON-NLS-1$
3372       case TokenNamedo :
3373         return "do"; //$NON-NLS-1$
3374       case TokenNameecho :
3375         return "echo"; //$NON-NLS-1$
3376       case TokenNameelse :
3377         return "else"; //$NON-NLS-1$
3378       case TokenNameelseif :
3379         return "elseif"; //$NON-NLS-1$
3380       case TokenNameendfor :
3381         return "endfor"; //$NON-NLS-1$
3382       case TokenNameendforeach :
3383         return "endforeach"; //$NON-NLS-1$
3384       case TokenNameendif :
3385         return "endif"; //$NON-NLS-1$
3386       case TokenNameendswitch :
3387         return "endswitch"; //$NON-NLS-1$
3388       case TokenNameendwhile :
3389         return "endwhile"; //$NON-NLS-1$
3390       case TokenNameextends :
3391         return "extends"; //$NON-NLS-1$
3392       //      case TokenNamefalse :
3393       //        return "false"; //$NON-NLS-1$
3394       case TokenNamefinal :
3395         return "final"; //$NON-NLS-1$
3396       case TokenNamefor :
3397         return "for"; //$NON-NLS-1$
3398       case TokenNameforeach :
3399         return "foreach"; //$NON-NLS-1$
3400       case TokenNamefunction :
3401         return "function"; //$NON-NLS-1$
3402       case TokenNameglobal :
3403         return "global"; //$NON-NLS-1$
3404       case TokenNameif :
3405         return "if"; //$NON-NLS-1$
3406       case TokenNameimplements :
3407         return "implements"; //$NON-NLS-1$
3408       case TokenNameinclude :
3409         return "include"; //$NON-NLS-1$
3410       case TokenNameinclude_once :
3411         return "include_once"; //$NON-NLS-1$
3412       case TokenNameinstanceof :
3413         return "instanceof"; //$NON-NLS-1$
3414       case TokenNameinterface :
3415         return "interface"; //$NON-NLS-1$
3416       case TokenNameisset :
3417         return "isset"; //$NON-NLS-1$
3418       case TokenNamelist :
3419         return "list"; //$NON-NLS-1$
3420       case TokenNamenew :
3421         return "new"; //$NON-NLS-1$
3422       //      case TokenNamenull :
3423       //        return "null"; //$NON-NLS-1$
3424       case TokenNameor :
3425         return "OR"; //$NON-NLS-1$
3426       case TokenNameprint :
3427         return "print"; //$NON-NLS-1$
3428       case TokenNameprivate :
3429         return "private"; //$NON-NLS-1$
3430       case TokenNameprotected :
3431         return "protected"; //$NON-NLS-1$
3432       case TokenNamepublic :
3433         return "public"; //$NON-NLS-1$
3434       case TokenNamerequire :
3435         return "require"; //$NON-NLS-1$
3436       case TokenNamerequire_once :
3437         return "require_once"; //$NON-NLS-1$
3438       case TokenNamereturn :
3439         return "return"; //$NON-NLS-1$
3440       case TokenNamestatic :
3441         return "static"; //$NON-NLS-1$
3442       case TokenNameswitch :
3443         return "switch"; //$NON-NLS-1$
3444       //      case TokenNametrue :
3445       //        return "true"; //$NON-NLS-1$
3446       case TokenNameunset :
3447         return "unset"; //$NON-NLS-1$
3448       case TokenNamevar :
3449         return "var"; //$NON-NLS-1$
3450       case TokenNamewhile :
3451         return "while"; //$NON-NLS-1$
3452       case TokenNamexor :
3453         return "XOR"; //$NON-NLS-1$
3454       //      case TokenNamethis :
3455       //        return "$this"; //$NON-NLS-1$
3456       case TokenNameIntegerLiteral :
3457         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3458       case TokenNameDoubleLiteral :
3459         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3460       case TokenNameStringLiteral :
3461         return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3462       case TokenNameStringConstant :
3463         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3464       case TokenNameStringInterpolated :
3465         return "StringInterpolated(" + new String(getCurrentTokenSource())
3466             + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3467       case TokenNameEncapsedString0 :
3468         return "`"; //$NON-NLS-1$
3469       case TokenNameEncapsedString1 :
3470         return "\'"; //$NON-NLS-1$
3471       case TokenNameEncapsedString2 :
3472         return "\""; //$NON-NLS-1$
3473       case TokenNameSTRING :
3474         return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3475       case TokenNameHEREDOC :
3476         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3477       case TokenNamePLUS_PLUS :
3478         return "++"; //$NON-NLS-1$
3479       case TokenNameMINUS_MINUS :
3480         return "--"; //$NON-NLS-1$
3481       case TokenNameEQUAL_EQUAL :
3482         return "=="; //$NON-NLS-1$
3483       case TokenNameEQUAL_EQUAL_EQUAL :
3484         return "==="; //$NON-NLS-1$
3485       case TokenNameEQUAL_GREATER :
3486         return "=>"; //$NON-NLS-1$
3487       case TokenNameLESS_EQUAL :
3488         return "<="; //$NON-NLS-1$
3489       case TokenNameGREATER_EQUAL :
3490         return ">="; //$NON-NLS-1$
3491       case TokenNameNOT_EQUAL :
3492         return "!="; //$NON-NLS-1$
3493       case TokenNameNOT_EQUAL_EQUAL :
3494         return "!=="; //$NON-NLS-1$
3495       case TokenNameLEFT_SHIFT :
3496         return "<<"; //$NON-NLS-1$
3497       case TokenNameRIGHT_SHIFT :
3498         return ">>"; //$NON-NLS-1$
3499       case TokenNamePLUS_EQUAL :
3500         return "+="; //$NON-NLS-1$
3501       case TokenNameMINUS_EQUAL :
3502         return "-="; //$NON-NLS-1$
3503       case TokenNameMULTIPLY_EQUAL :
3504         return "*="; //$NON-NLS-1$
3505       case TokenNameDIVIDE_EQUAL :
3506         return "/="; //$NON-NLS-1$
3507       case TokenNameAND_EQUAL :
3508         return "&="; //$NON-NLS-1$
3509       case TokenNameOR_EQUAL :
3510         return "|="; //$NON-NLS-1$
3511       case TokenNameXOR_EQUAL :
3512         return "^="; //$NON-NLS-1$
3513       case TokenNameREMAINDER_EQUAL :
3514         return "%="; //$NON-NLS-1$
3515       case TokenNameDOT_EQUAL :
3516         return ".="; //$NON-NLS-1$
3517       case TokenNameLEFT_SHIFT_EQUAL :
3518         return "<<="; //$NON-NLS-1$
3519       case TokenNameRIGHT_SHIFT_EQUAL :
3520         return ">>="; //$NON-NLS-1$
3521       case TokenNameOR_OR :
3522         return "||"; //$NON-NLS-1$
3523       case TokenNameAND_AND :
3524         return "&&"; //$NON-NLS-1$
3525       case TokenNamePLUS :
3526         return "+"; //$NON-NLS-1$
3527       case TokenNameMINUS :
3528         return "-"; //$NON-NLS-1$
3529       case TokenNameMINUS_GREATER :
3530         return "->";
3531       case TokenNameNOT :
3532         return "!"; //$NON-NLS-1$
3533       case TokenNameREMAINDER :
3534         return "%"; //$NON-NLS-1$
3535       case TokenNameXOR :
3536         return "^"; //$NON-NLS-1$
3537       case TokenNameAND :
3538         return "&"; //$NON-NLS-1$
3539       case TokenNameMULTIPLY :
3540         return "*"; //$NON-NLS-1$
3541       case TokenNameOR :
3542         return "|"; //$NON-NLS-1$
3543       case TokenNameTWIDDLE :
3544         return "~"; //$NON-NLS-1$
3545       case TokenNameTWIDDLE_EQUAL :
3546         return "~="; //$NON-NLS-1$
3547       case TokenNameDIVIDE :
3548         return "/"; //$NON-NLS-1$
3549       case TokenNameGREATER :
3550         return ">"; //$NON-NLS-1$
3551       case TokenNameLESS :
3552         return "<"; //$NON-NLS-1$
3553       case TokenNameLPAREN :
3554         return "("; //$NON-NLS-1$
3555       case TokenNameRPAREN :
3556         return ")"; //$NON-NLS-1$
3557       case TokenNameLBRACE :
3558         return "{"; //$NON-NLS-1$
3559       case TokenNameRBRACE :
3560         return "}"; //$NON-NLS-1$
3561       case TokenNameLBRACKET :
3562         return "["; //$NON-NLS-1$
3563       case TokenNameRBRACKET :
3564         return "]"; //$NON-NLS-1$
3565       case TokenNameSEMICOLON :
3566         return ";"; //$NON-NLS-1$
3567       case TokenNameQUESTION :
3568         return "?"; //$NON-NLS-1$
3569       case TokenNameCOLON :
3570         return ":"; //$NON-NLS-1$
3571       case TokenNameCOMMA :
3572         return ","; //$NON-NLS-1$
3573       case TokenNameDOT :
3574         return "."; //$NON-NLS-1$
3575       case TokenNameEQUAL :
3576         return "="; //$NON-NLS-1$
3577       case TokenNameAT :
3578         return "@";
3579       case TokenNameDOLLAR :
3580         return "$";
3581       case TokenNameDOLLAR_LBRACE :
3582         return "${";
3583       case TokenNameEOF :
3584         return "EOF"; //$NON-NLS-1$
3585       case TokenNameWHITESPACE :
3586         return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3587       case TokenNameCOMMENT_LINE :
3588         return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3589       case TokenNameCOMMENT_BLOCK :
3590         return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3591       case TokenNameCOMMENT_PHPDOC :
3592         return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3593       //      case TokenNameHTML :
3594       //        return "HTML(" + new String(getCurrentTokenSource()) + ")";
3595       // //$NON-NLS-1$
3596       case TokenNameFILE :
3597         return "__FILE__"; //$NON-NLS-1$
3598       case TokenNameLINE :
3599         return "__LINE__"; //$NON-NLS-1$
3600       case TokenNameCLASS_C :
3601         return "__CLASS__"; //$NON-NLS-1$
3602       case TokenNameMETHOD_C :
3603         return "__METHOD__"; //$NON-NLS-1$
3604       case TokenNameFUNC_C :
3605         return "__FUNCTION__"; //$NON-NLS-1
3606       case TokenNameboolCAST :
3607         return "( bool )"; //$NON-NLS-1$
3608       case TokenNameintCAST :
3609         return "( int )"; //$NON-NLS-1$
3610       case TokenNamedoubleCAST :
3611         return "( double )"; //$NON-NLS-1$
3612       case TokenNameobjectCAST :
3613         return "( object )"; //$NON-NLS-1$
3614       case TokenNamestringCAST :
3615         return "( string )"; //$NON-NLS-1$
3616       default :
3617         return "not-a-token(" + (new Integer(act)) + ") "
3618             + new String(getCurrentTokenSource()); //$NON-NLS-1$
3619     }
3620   }
3621   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3622       boolean checkNonExternalizedStringLiterals) {
3623     this(tokenizeComments, tokenizeWhiteSpace,
3624         checkNonExternalizedStringLiterals, false);
3625   }
3626   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3627       boolean checkNonExternalizedStringLiterals, boolean assertMode) {
3628     this.eofPosition = Integer.MAX_VALUE;
3629     this.tokenizeComments = tokenizeComments;
3630     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3631     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3632     this.assertMode = assertMode;
3633     this.encapsedStringStack = null;
3634   }
3635   private void checkNonExternalizeString() throws InvalidInputException {
3636     if (currentLine == null)
3637       return;
3638     parseTags(currentLine);
3639   }
3640   private void parseTags(NLSLine line) throws InvalidInputException {
3641     String s = new String(getCurrentTokenSource());
3642     int pos = s.indexOf(TAG_PREFIX);
3643     int lineLength = line.size();
3644     while (pos != -1) {
3645       int start = pos + TAG_PREFIX_LENGTH;
3646       int end = s.indexOf(TAG_POSTFIX, start);
3647       String index = s.substring(start, end);
3648       int i = 0;
3649       try {
3650         i = Integer.parseInt(index) - 1;
3651         // Tags are one based not zero based.
3652       } catch (NumberFormatException e) {
3653         i = -1; // we don't want to consider this as a valid NLS tag
3654       }
3655       if (line.exists(i)) {
3656         line.set(i, null);
3657       }
3658       pos = s.indexOf(TAG_PREFIX, start);
3659     }
3660     this.nonNLSStrings = new StringLiteral[lineLength];
3661     int nonNLSCounter = 0;
3662     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3663       StringLiteral literal = (StringLiteral) iterator.next();
3664       if (literal != null) {
3665         this.nonNLSStrings[nonNLSCounter++] = literal;
3666       }
3667     }
3668     if (nonNLSCounter == 0) {
3669       this.nonNLSStrings = null;
3670       currentLine = null;
3671       return;
3672     }
3673     this.wasNonExternalizedStringLiteral = true;
3674     if (nonNLSCounter != lineLength) {
3675       System.arraycopy(this.nonNLSStrings, 0,
3676           (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0,
3677           nonNLSCounter);
3678     }
3679     currentLine = null;
3680   }
3681   public final void scanEscapeCharacter() throws InvalidInputException {
3682     // the string with "\\u" is a legal string of two chars \ and u
3683     //thus we use a direct access to the source (for regular cases).
3684     if (unicodeAsBackSlash) {
3685       // consume next character
3686       unicodeAsBackSlash = false;
3687       //                        if (((currentCharacter = source[currentPosition++]) == '\\') &&
3688       // (source[currentPosition] == 'u')) {
3689       //                                getNextUnicodeChar();
3690       //                        } else {
3691       if (withoutUnicodePtr != 0) {
3692         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3693         //                              }
3694       }
3695     } else
3696       currentCharacter = source[currentPosition++];
3697     switch (currentCharacter) {
3698       case 'b' :
3699         currentCharacter = '\b';
3700         break;
3701       case 't' :
3702         currentCharacter = '\t';
3703         break;
3704       case 'n' :
3705         currentCharacter = '\n';
3706         break;
3707       case 'f' :
3708         currentCharacter = '\f';
3709         break;
3710       case 'r' :
3711         currentCharacter = '\r';
3712         break;
3713       case '\"' :
3714         currentCharacter = '\"';
3715         break;
3716       case '\'' :
3717         currentCharacter = '\'';
3718         break;
3719       case '\\' :
3720         currentCharacter = '\\';
3721         break;
3722       default :
3723         // -----------octal escape--------------
3724         // OctalDigit
3725         // OctalDigit OctalDigit
3726         // ZeroToThree OctalDigit OctalDigit
3727         int number = Character.getNumericValue(currentCharacter);
3728         if (number >= 0 && number <= 7) {
3729           boolean zeroToThreeNot = number > 3;
3730           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3731             int digit = Character.getNumericValue(currentCharacter);
3732             if (digit >= 0 && digit <= 7) {
3733               number = (number * 8) + digit;
3734               if (Character
3735                   .isDigit(currentCharacter = source[currentPosition++])) {
3736                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
3737                   // Digit --> ignore last character
3738                   currentPosition--;
3739                 } else {
3740                   digit = Character.getNumericValue(currentCharacter);
3741                   if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
3742                     // OctalDigit OctalDigit
3743                     number = (number * 8) + digit;
3744                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit
3745                     // --> ignore last character
3746                     currentPosition--;
3747                   }
3748                 }
3749               } else { // has read \OctalDigit NonDigit--> ignore last
3750                 // character
3751                 currentPosition--;
3752               }
3753             } else { // has read \OctalDigit NonOctalDigit--> ignore last
3754               // character
3755               currentPosition--;
3756             }
3757           } else { // has read \OctalDigit --> ignore last character
3758             currentPosition--;
3759           }
3760           if (number > 255)
3761             throw new InvalidInputException(INVALID_ESCAPE);
3762           currentCharacter = (char) number;
3763         } else
3764           throw new InvalidInputException(INVALID_ESCAPE);
3765     }
3766   }
3767   // chech presence of task: tags
3768   public void checkTaskTag(int commentStart, int commentEnd) {
3769     // only look for newer task: tags
3770     if (this.foundTaskCount > 0
3771         && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
3772       return;
3773     }
3774     int foundTaskIndex = this.foundTaskCount;
3775     nextChar : for (int i = commentStart; i < commentEnd
3776         && i < this.eofPosition; i++) {
3777       char[] tag = null;
3778       char[] priority = null;
3779       // check for tag occurrence
3780       nextTag : for (int itag = 0; itag < this.taskTags.length; itag++) {
3781         tag = this.taskTags[itag];
3782         priority = this.taskPriorities != null
3783             && itag < this.taskPriorities.length
3784             ? this.taskPriorities[itag]
3785             : null;
3786         int tagLength = tag.length;
3787         for (int t = 0; t < tagLength; t++) {
3788           if (this.source[i + t] != tag[t])
3789             continue nextTag;
3790         }
3791         if (this.foundTaskTags == null) {
3792           this.foundTaskTags = new char[5][];
3793           this.foundTaskMessages = new char[5][];
3794           this.foundTaskPriorities = new char[5][];
3795           this.foundTaskPositions = new int[5][];
3796         } else if (this.foundTaskCount == this.foundTaskTags.length) {
3797           System.arraycopy(this.foundTaskTags, 0,
3798               this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
3799               this.foundTaskCount);
3800           System.arraycopy(this.foundTaskMessages, 0,
3801               this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
3802               this.foundTaskCount);
3803           System.arraycopy(this.foundTaskPriorities, 0,
3804               this.foundTaskPriorities = new char[this.foundTaskCount * 2][],
3805               0, this.foundTaskCount);
3806           System.arraycopy(this.foundTaskPositions, 0,
3807               this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
3808               this.foundTaskCount);
3809         }
3810         this.foundTaskTags[this.foundTaskCount] = tag;
3811         this.foundTaskPriorities[this.foundTaskCount] = priority;
3812         this.foundTaskPositions[this.foundTaskCount] = new int[]{i,
3813             i + tagLength - 1};
3814         this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
3815         this.foundTaskCount++;
3816         i += tagLength - 1; // will be incremented when looping
3817       }
3818     }
3819     for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
3820       // retrieve message start and end positions
3821       int msgStart = this.foundTaskPositions[i][0]
3822           + this.foundTaskTags[i].length;
3823       int max_value = i + 1 < this.foundTaskCount
3824           ? this.foundTaskPositions[i + 1][0] - 1
3825           : commentEnd - 1;
3826       // at most beginning of next task
3827       if (max_value < msgStart)
3828         max_value = msgStart; // would only occur if tag is before EOF.
3829       int end = -1;
3830       char c;
3831       for (int j = msgStart; j < max_value; j++) {
3832         if ((c = this.source[j]) == '\n' || c == '\r') {
3833           end = j - 1;
3834           break;
3835         }
3836       }
3837       if (end == -1) {
3838         for (int j = max_value; j > msgStart; j--) {
3839           if ((c = this.source[j]) == '*') {
3840             end = j - 1;
3841             break;
3842           }
3843         }
3844         if (end == -1)
3845           end = max_value;
3846       }
3847       if (msgStart == end)
3848         continue; // empty
3849       // trim the message
3850       while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
3851         end--;
3852       while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
3853         msgStart++;
3854       // update the end position of the task
3855       this.foundTaskPositions[i][1] = end;
3856       // get the message source
3857       final int messageLength = end - msgStart + 1;
3858       char[] message = new char[messageLength];
3859       System.arraycopy(source, msgStart, message, 0, messageLength);
3860       this.foundTaskMessages[i] = message;
3861     }
3862   }
3863 }