net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java

   1 /*******************************************************************************
   2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
   3  * All rights reserved. This program and the accompanying materials
   4  * are made available under the terms of the Common Public License v0.5
   5  * which accompanies this distribution, and is available at
   6  * http://www.eclipse.org/legal/cpl-v05.html
   7  *
   8  * Contributors:
   9  *     IBM Corporation - initial API and implementation
  10  ******************************************************************************/
  11 package net.sourceforge.phpdt.internal.compiler.parser;
  12
  13 import java.util.ArrayList;
  14 import java.util.Iterator;
  15 import java.util.List;
  16
  17 import net.sourceforge.phpdt.core.compiler.CharOperation;
  18 import net.sourceforge.phpdt.core.compiler.IScanner;
  19 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
  20 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
  21 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
  22
  23 public class Scanner implements IScanner, ITerminalSymbols {
  24
  25   /*
  26    * APIs ares - getNextToken() which return the current type of the token (this value is not memorized by the scanner) -
  27    * getCurrentTokenSource() which provides with the token "REAL" source (aka all unicode have been transformed into a correct
  28    * char) - sourceStart gives the position into the stream - currentPosition-1 gives the sourceEnd position into the stream
  29    */
  30
  31   // 1.4 feature
  32   private boolean assertMode;
  33   public boolean useAssertAsAnIndentifier = false;
  34   //flag indicating if processed source contains occurrences of keyword assert
  35   public boolean containsAssertKeyword = false;
  36
  37   public boolean recordLineSeparator;
  38   public boolean phpMode = false;
  39
  40   public char currentCharacter;
  41   public int startPosition;
  42   public int currentPosition;
  43   public int initialPosition, eofPosition;
  44   // after this position eof are generated instead of real token from the source
  45
  46   public boolean tokenizeComments;
  47   public boolean tokenizeWhiteSpace;
  48
  49   //source should be viewed as a window (aka a part)
  50   //of a entire very large stream
  51   public char source[];
  52
  53   //unicode support
  54   public char[] withoutUnicodeBuffer;
  55   public int withoutUnicodePtr;
  56   //when == 0 ==> no unicode in the current token
  57   public boolean unicodeAsBackSlash = false;
  58
  59   public boolean scanningFloatLiteral = false;
  60
  61   //support for /** comments
  62   //public char[][] comments = new char[10][];
  63   public int[] commentStops = new int[10];
  64   public int[] commentStarts = new int[10];
  65   public int commentPtr = -1; // no comment test with commentPtr value -1
  66
  67   //diet parsing support - jump over some method body when requested
  68   public boolean diet = false;
  69
  70   //support for the poor-line-debuggers ....
  71   //remember the position of the cr/lf
  72   public int[] lineEnds = new int[250];
  73   public int linePtr = -1;
  74   public boolean wasAcr = false;
  75
  76   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
  77
  78   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
  79   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
  80   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
  81   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
  82   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
  83   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
  84   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
  85
  86   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
  87   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
  88   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
  89   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
  90
  91   //----------------optimized identifier managment------------------
  92   static final char[] charArray_a = new char[] { 'a' },
  93     charArray_b = new char[] { 'b' },
  94     charArray_c = new char[] { 'c' },
  95     charArray_d = new char[] { 'd' },
  96     charArray_e = new char[] { 'e' },
  97     charArray_f = new char[] { 'f' },
  98     charArray_g = new char[] { 'g' },
  99     charArray_h = new char[] { 'h' },
 100     charArray_i = new char[] { 'i' },
 101     charArray_j = new char[] { 'j' },
 102     charArray_k = new char[] { 'k' },
 103     charArray_l = new char[] { 'l' },
 104     charArray_m = new char[] { 'm' },
 105     charArray_n = new char[] { 'n' },
 106     charArray_o = new char[] { 'o' },
 107     charArray_p = new char[] { 'p' },
 108     charArray_q = new char[] { 'q' },
 109     charArray_r = new char[] { 'r' },
 110     charArray_s = new char[] { 's' },
 111     charArray_t = new char[] { 't' },
 112     charArray_u = new char[] { 'u' },
 113     charArray_v = new char[] { 'v' },
 114     charArray_w = new char[] { 'w' },
 115     charArray_x = new char[] { 'x' },
 116     charArray_y = new char[] { 'y' },
 117     charArray_z = new char[] { 'z' };
 118
 119   static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
 120   static final int TableSize = 30, InternalTableSize = 6;
 121   //30*6 = 180 entries
 122   public static final int OptimizedLength = 6;
 123   public /* static */
 124   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
 125   // support for detecting non-externalized string literals
 126   int currentLineNr = -1;
 127   int previousLineNr = -1;
 128   NLSLine currentLine = null;
 129   List lines = new ArrayList();
 130   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
 131   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
 132   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
 133   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
 134   public StringLiteral[] nonNLSStrings = null;
 135   public boolean checkNonExternalizedStringLiterals = true;
 136   public boolean wasNonExternalizedStringLiteral = false;
 137
 138   /* static */ {
 139     for (int i = 0; i < 6; i++) {
 140       for (int j = 0; j < TableSize; j++) {
 141         for (int k = 0; k < InternalTableSize; k++) {
 142           charArray_length[i][j][k] = initCharArray;
 143         }
 144       }
 145     }
 146   }
 147   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
 148
 149   public static final int RoundBracket = 0;
 150   public static final int SquareBracket = 1;
 151   public static final int CurlyBracket = 2;
 152   public static final int BracketKinds = 3;
 153
 154   // task tag support
 155   public char[][] foundTaskTags = null;
 156   public char[][] foundTaskMessages;
 157   public char[][] foundTaskPriorities = null;
 158   public int[][] foundTaskPositions;
 159   public int foundTaskCount = 0;
 160   public char[][] taskTags = null;
 161   public char[][] taskPriorities = null;
 162
 163   public static final boolean DEBUG = false;
 164
 165   public Scanner() {
 166     this(false, false);
 167   }
 168   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
 169     this(tokenizeComments, tokenizeWhiteSpace, false);
 170   }
 171
 172   /**
 173    * Determines if the specified character is permissible as the first character in a PHP identifier
 174    */
 175   public static boolean isPHPIdentifierStart(char ch) {
 176     return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 177   }
 178
 179   /**
 180    * Determines if the specified character may be part of a PHP identifier as other than the first character
 181    */
 182   public static boolean isPHPIdentifierPart(char ch) {
 183     return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 184   }
 185
 186   public final boolean atEnd() {
 187     // This code is not relevant if source is
 188     // Only a part of the real stream input
 189
 190     return source.length == currentPosition;
 191   }
 192   public char[] getCurrentIdentifierSource() {
 193     //return the token REAL source (aka unicodes are precomputed)
 194
 195     char[] result;
 196     //    if (withoutUnicodePtr != 0)
 197     //      //0 is used as a fast test flag so the real first char is in position 1
 198     //      System.arraycopy(
 199     //        withoutUnicodeBuffer,
 200     //        1,
 201     //        result = new char[withoutUnicodePtr],
 202     //        0,
 203     //        withoutUnicodePtr);
 204     //    else {
 205     int length = currentPosition - startPosition;
 206     switch (length) { // see OptimizedLength
 207       case 1 :
 208         return optimizedCurrentTokenSource1();
 209       case 2 :
 210         return optimizedCurrentTokenSource2();
 211       case 3 :
 212         return optimizedCurrentTokenSource3();
 213       case 4 :
 214         return optimizedCurrentTokenSource4();
 215       case 5 :
 216         return optimizedCurrentTokenSource5();
 217       case 6 :
 218         return optimizedCurrentTokenSource6();
 219     }
 220     //no optimization
 221     System.arraycopy(source, startPosition, result = new char[length], 0, length);
 222     //   }
 223     return result;
 224   }
 225   public int getCurrentTokenEndPosition() {
 226     return this.currentPosition - 1;
 227   }
 228
 229   public final char[] getCurrentTokenSource() {
 230     // Return the token REAL source (aka unicodes are precomputed)
 231
 232     char[] result;
 233     //    if (withoutUnicodePtr != 0)
 234     //      // 0 is used as a fast test flag so the real first char is in position 1
 235     //      System.arraycopy(
 236     //        withoutUnicodeBuffer,
 237     //        1,
 238     //        result = new char[withoutUnicodePtr],
 239     //        0,
 240     //        withoutUnicodePtr);
 241     //    else {
 242     int length;
 243     System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
 244     //    }
 245     return result;
 246   }
 247
 248   public final char[] getCurrentTokenSource(int startPos) {
 249     // Return the token REAL source (aka unicodes are precomputed)
 250
 251     char[] result;
 252     //    if (withoutUnicodePtr != 0)
 253     //      // 0 is used as a fast test flag so the real first char is in position 1
 254     //      System.arraycopy(
 255     //        withoutUnicodeBuffer,
 256     //        1,
 257     //        result = new char[withoutUnicodePtr],
 258     //        0,
 259     //        withoutUnicodePtr);
 260     //    else {
 261     int length;
 262     System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
 263     //  }
 264     return result;
 265   }
 266
 267   public final char[] getCurrentTokenSourceString() {
 268     //return the token REAL source (aka unicodes are precomputed).
 269     //REMOVE the two " that are at the beginning and the end.
 270
 271     char[] result;
 272     if (withoutUnicodePtr != 0)
 273       //0 is used as a fast test flag so the real first char is in position 1
 274       System.arraycopy(withoutUnicodeBuffer, 2,
 275       //2 is 1 (real start) + 1 (to jump over the ")
 276       result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
 277     else {
 278       int length;
 279       System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 280     }
 281     return result;
 282   }
 283   public int getCurrentTokenStartPosition() {
 284     return this.startPosition;
 285   }
 286
 287   public final char[] getCurrentStringLiteralSource() {
 288     // Return the token REAL source (aka unicodes are precomputed)
 289
 290     char[] result;
 291
 292     int length;
 293     System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 294     //    }
 295     return result;
 296   }
 297
 298   /*
 299    * Search the source position corresponding to the end of a given line number
 300    *
 301    * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
 302    *
 303    * In case the given line number is inconsistent, answers -1.
 304    */
 305   public final int getLineEnd(int lineNumber) {
 306
 307     if (lineEnds == null)
 308       return -1;
 309     if (lineNumber >= lineEnds.length)
 310       return -1;
 311     if (lineNumber <= 0)
 312       return -1;
 313
 314     if (lineNumber == lineEnds.length - 1)
 315       return eofPosition;
 316     return lineEnds[lineNumber - 1];
 317     // next line start one character behind the lineEnd of the previous line
 318   }
 319   /**
 320    * Search the source position corresponding to the beginning of a given line number
 321    *
 322    * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
 323    *
 324    * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
 325    *
 326    * In case the given line number is inconsistent, answers -1.
 327    */
 328   public final int getLineStart(int lineNumber) {
 329
 330     if (lineEnds == null)
 331       return -1;
 332     if (lineNumber >= lineEnds.length)
 333       return -1;
 334     if (lineNumber <= 0)
 335       return -1;
 336
 337     if (lineNumber == 1)
 338       return initialPosition;
 339     return lineEnds[lineNumber - 2] + 1;
 340     // next line start one character behind the lineEnd of the previous line
 341   }
 342   public final boolean getNextChar(char testedChar) {
 343     //BOOLEAN
 344     //handle the case of unicode.
 345     //when a unicode appears then we must use a buffer that holds char internal values
 346     //At the end of this method currentCharacter holds the new visited char
 347     //and currentPosition points right next after it
 348     //Both previous lines are true if the currentCharacter is == to the testedChar
 349     //On false, no side effect has occured.
 350
 351     //ALL getNextChar.... ARE OPTIMIZED COPIES
 352
 353     int temp = currentPosition;
 354     try {
 355       currentCharacter = source[currentPosition++];
 356       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 357       //        && (source[currentPosition] == 'u')) {
 358       //        //-------------unicode traitement ------------
 359       //        int c1, c2, c3, c4;
 360       //        int unicodeSize = 6;
 361       //        currentPosition++;
 362       //        while (source[currentPosition] == 'u') {
 363       //          currentPosition++;
 364       //          unicodeSize++;
 365       //        }
 366       //
 367       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 368       //          || c1 < 0)
 369       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 370       //            || c2 < 0)
 371       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 372       //            || c3 < 0)
 373       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 374       //            || c4 < 0)) {
 375       //          currentPosition = temp;
 376       //          return false;
 377       //        }
 378       //
 379       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 380       //        if (currentCharacter != testedChar) {
 381       //          currentPosition = temp;
 382       //          return false;
 383       //        }
 384       //        unicodeAsBackSlash = currentCharacter == '\\';
 385       //
 386       //        //need the unicode buffer
 387       //        if (withoutUnicodePtr == 0) {
 388       //          //buffer all the entries that have been left aside....
 389       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 390       //          System.arraycopy(
 391       //            source,
 392       //            startPosition,
 393       //            withoutUnicodeBuffer,
 394       //            1,
 395       //            withoutUnicodePtr);
 396       //        }
 397       //        //fill the buffer with the char
 398       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 399       //        return true;
 400       //
 401       //      } //-------------end unicode traitement--------------
 402       //      else {
 403       if (currentCharacter != testedChar) {
 404         currentPosition = temp;
 405         return false;
 406       }
 407       unicodeAsBackSlash = false;
 408       //        if (withoutUnicodePtr != 0)
 409       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 410       return true;
 411       //      }
 412     } catch (IndexOutOfBoundsException e) {
 413       unicodeAsBackSlash = false;
 414       currentPosition = temp;
 415       return false;
 416     }
 417   }
 418   public final int getNextChar(char testedChar1, char testedChar2) {
 419     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
 420     //test can be done with (x==0) for the first and (x>0) for the second
 421     //handle the case of unicode.
 422     //when a unicode appears then we must use a buffer that holds char internal values
 423     //At the end of this method currentCharacter holds the new visited char
 424     //and currentPosition points right next after it
 425     //Both previous lines are true if the currentCharacter is == to the testedChar1/2
 426     //On false, no side effect has occured.
 427
 428     //ALL getNextChar.... ARE OPTIMIZED COPIES
 429
 430     int temp = currentPosition;
 431     try {
 432       int result;
 433       currentCharacter = source[currentPosition++];
 434       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 435       //        && (source[currentPosition] == 'u')) {
 436       //        //-------------unicode traitement ------------
 437       //        int c1, c2, c3, c4;
 438       //        int unicodeSize = 6;
 439       //        currentPosition++;
 440       //        while (source[currentPosition] == 'u') {
 441       //          currentPosition++;
 442       //          unicodeSize++;
 443       //        }
 444       //
 445       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 446       //          || c1 < 0)
 447       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 448       //            || c2 < 0)
 449       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 450       //            || c3 < 0)
 451       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 452       //            || c4 < 0)) {
 453       //          currentPosition = temp;
 454       //          return 2;
 455       //        }
 456       //
 457       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 458       //        if (currentCharacter == testedChar1)
 459       //          result = 0;
 460       //        else if (currentCharacter == testedChar2)
 461       //          result = 1;
 462       //        else {
 463       //          currentPosition = temp;
 464       //          return -1;
 465       //        }
 466       //
 467       //        //need the unicode buffer
 468       //        if (withoutUnicodePtr == 0) {
 469       //          //buffer all the entries that have been left aside....
 470       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 471       //          System.arraycopy(
 472       //            source,
 473       //            startPosition,
 474       //            withoutUnicodeBuffer,
 475       //            1,
 476       //            withoutUnicodePtr);
 477       //        }
 478       //        //fill the buffer with the char
 479       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 480       //        return result;
 481       //      } //-------------end unicode traitement--------------
 482       //      else {
 483       if (currentCharacter == testedChar1)
 484         result = 0;
 485       else if (currentCharacter == testedChar2)
 486         result = 1;
 487       else {
 488         currentPosition = temp;
 489         return -1;
 490       }
 491
 492       //        if (withoutUnicodePtr != 0)
 493       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 494       return result;
 495       //     }
 496     } catch (IndexOutOfBoundsException e) {
 497       currentPosition = temp;
 498       return -1;
 499     }
 500   }
 501   public final boolean getNextCharAsDigit() {
 502     //BOOLEAN
 503     //handle the case of unicode.
 504     //when a unicode appears then we must use a buffer that holds char internal values
 505     //At the end of this method currentCharacter holds the new visited char
 506     //and currentPosition points right next after it
 507     //Both previous lines are true if the currentCharacter is a digit
 508     //On false, no side effect has occured.
 509
 510     //ALL getNextChar.... ARE OPTIMIZED COPIES
 511
 512     int temp = currentPosition;
 513     try {
 514       currentCharacter = source[currentPosition++];
 515       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 516       //        && (source[currentPosition] == 'u')) {
 517       //        //-------------unicode traitement ------------
 518       //        int c1, c2, c3, c4;
 519       //        int unicodeSize = 6;
 520       //        currentPosition++;
 521       //        while (source[currentPosition] == 'u') {
 522       //          currentPosition++;
 523       //          unicodeSize++;
 524       //        }
 525       //
 526       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 527       //          || c1 < 0)
 528       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 529       //            || c2 < 0)
 530       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 531       //            || c3 < 0)
 532       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 533       //            || c4 < 0)) {
 534       //          currentPosition = temp;
 535       //          return false;
 536       //        }
 537       //
 538       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 539       //        if (!Character.isDigit(currentCharacter)) {
 540       //          currentPosition = temp;
 541       //          return false;
 542       //        }
 543       //
 544       //        //need the unicode buffer
 545       //        if (withoutUnicodePtr == 0) {
 546       //          //buffer all the entries that have been left aside....
 547       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 548       //          System.arraycopy(
 549       //            source,
 550       //            startPosition,
 551       //            withoutUnicodeBuffer,
 552       //            1,
 553       //            withoutUnicodePtr);
 554       //        }
 555       //        //fill the buffer with the char
 556       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 557       //        return true;
 558       //      } //-------------end unicode traitement--------------
 559       //      else {
 560       if (!Character.isDigit(currentCharacter)) {
 561         currentPosition = temp;
 562         return false;
 563       }
 564       //        if (withoutUnicodePtr != 0)
 565       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 566       return true;
 567       //      }
 568     } catch (IndexOutOfBoundsException e) {
 569       currentPosition = temp;
 570       return false;
 571     }
 572   }
 573   public final boolean getNextCharAsDigit(int radix) {
 574     //BOOLEAN
 575     //handle the case of unicode.
 576     //when a unicode appears then we must use a buffer that holds char internal values
 577     //At the end of this method currentCharacter holds the new visited char
 578     //and currentPosition points right next after it
 579     //Both previous lines are true if the currentCharacter is a digit base on radix
 580     //On false, no side effect has occured.
 581
 582     //ALL getNextChar.... ARE OPTIMIZED COPIES
 583
 584     int temp = currentPosition;
 585     try {
 586       currentCharacter = source[currentPosition++];
 587       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 588       //        && (source[currentPosition] == 'u')) {
 589       //        //-------------unicode traitement ------------
 590       //        int c1, c2, c3, c4;
 591       //        int unicodeSize = 6;
 592       //        currentPosition++;
 593       //        while (source[currentPosition] == 'u') {
 594       //          currentPosition++;
 595       //          unicodeSize++;
 596       //        }
 597       //
 598       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 599       //          || c1 < 0)
 600       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 601       //            || c2 < 0)
 602       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 603       //            || c3 < 0)
 604       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 605       //            || c4 < 0)) {
 606       //          currentPosition = temp;
 607       //          return false;
 608       //        }
 609       //
 610       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 611       //        if (Character.digit(currentCharacter, radix) == -1) {
 612       //          currentPosition = temp;
 613       //          return false;
 614       //        }
 615       //
 616       //        //need the unicode buffer
 617       //        if (withoutUnicodePtr == 0) {
 618       //          //buffer all the entries that have been left aside....
 619       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 620       //          System.arraycopy(
 621       //            source,
 622       //            startPosition,
 623       //            withoutUnicodeBuffer,
 624       //            1,
 625       //            withoutUnicodePtr);
 626       //        }
 627       //        //fill the buffer with the char
 628       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 629       //        return true;
 630       //      } //-------------end unicode traitement--------------
 631       //      else {
 632       if (Character.digit(currentCharacter, radix) == -1) {
 633         currentPosition = temp;
 634         return false;
 635       }
 636       //        if (withoutUnicodePtr != 0)
 637       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 638       return true;
 639       //      }
 640     } catch (IndexOutOfBoundsException e) {
 641       currentPosition = temp;
 642       return false;
 643     }
 644   }
 645   public boolean getNextCharAsJavaIdentifierPart() {
 646     //BOOLEAN
 647     //handle the case of unicode.
 648     //when a unicode appears then we must use a buffer that holds char internal values
 649     //At the end of this method currentCharacter holds the new visited char
 650     //and currentPosition points right next after it
 651     //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
 652     //On false, no side effect has occured.
 653
 654     //ALL getNextChar.... ARE OPTIMIZED COPIES
 655
 656     int temp = currentPosition;
 657     try {
 658       currentCharacter = source[currentPosition++];
 659       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 660       //        && (source[currentPosition] == 'u')) {
 661       //        //-------------unicode traitement ------------
 662       //        int c1, c2, c3, c4;
 663       //        int unicodeSize = 6;
 664       //        currentPosition++;
 665       //        while (source[currentPosition] == 'u') {
 666       //          currentPosition++;
 667       //          unicodeSize++;
 668       //        }
 669       //
 670       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 671       //          || c1 < 0)
 672       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 673       //            || c2 < 0)
 674       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 675       //            || c3 < 0)
 676       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 677       //            || c4 < 0)) {
 678       //          currentPosition = temp;
 679       //          return false;
 680       //        }
 681       //
 682       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 683       //        if (!isPHPIdentifierPart(currentCharacter)) {
 684       //          currentPosition = temp;
 685       //          return false;
 686       //        }
 687       //
 688       //        //need the unicode buffer
 689       //        if (withoutUnicodePtr == 0) {
 690       //          //buffer all the entries that have been left aside....
 691       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 692       //          System.arraycopy(
 693       //            source,
 694       //            startPosition,
 695       //            withoutUnicodeBuffer,
 696       //            1,
 697       //            withoutUnicodePtr);
 698       //        }
 699       //        //fill the buffer with the char
 700       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 701       //        return true;
 702       //      } //-------------end unicode traitement--------------
 703       //      else {
 704       if (!isPHPIdentifierPart(currentCharacter)) {
 705         currentPosition = temp;
 706         return false;
 707       }
 708
 709       //        if (withoutUnicodePtr != 0)
 710       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 711       return true;
 712       //      }
 713     } catch (IndexOutOfBoundsException e) {
 714       currentPosition = temp;
 715       return false;
 716     }
 717   }
 718
 719   public int getNextToken() throws InvalidInputException {
 720     int htmlPosition = currentPosition;
 721     try {
 722       while (!phpMode) {
 723         currentCharacter = source[currentPosition++];
 724         if (currentCharacter == '<') {
 725           if (getNextChar('?')) {
 726             currentCharacter = source[currentPosition++];
 727             if ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
 728               // <?
 729               startPosition = currentPosition;
 730               phpMode = true;
 731               if (tokenizeWhiteSpace) {
 732                 // && (whiteStart != currentPosition - 1)) {
 733                 // reposition scanner in case we are interested by spaces as tokens
 734                 startPosition = htmlPosition;
 735                 return TokenNameHTML;
 736               }
 737             } else {
 738               boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
 739               if (phpStart) {
 740                 int test = getNextChar('H', 'h');
 741                 if (test >= 0) {
 742                   test = getNextChar('P', 'p');
 743                   if (test >= 0) {
 744                     // <?PHP <?php
 745                     startPosition = currentPosition;
 746                     phpMode = true;
 747
 748                     if (tokenizeWhiteSpace) {
 749                       // && (whiteStart != currentPosition - 1)) {
 750                       // reposition scanner in case we are interested by spaces as tokens
 751                       startPosition = htmlPosition;
 752                       return TokenNameHTML;
 753                     }
 754                   }
 755                 }
 756               }
 757             }
 758           }
 759         }
 760
 761         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 762           if (recordLineSeparator) {
 763             pushLineSeparator();
 764           } else {
 765             currentLine = null;
 766           }
 767         }
 768       }
 769     } //-----------------end switch while try--------------------
 770     catch (IndexOutOfBoundsException e) {
 771       if (tokenizeWhiteSpace) {
 772         // && (whiteStart != currentPosition - 1)) {
 773         // reposition scanner in case we are interested by spaces as tokens
 774         startPosition = htmlPosition;
 775       }
 776       return TokenNameEOF;
 777     }
 778
 779     if (phpMode) {
 780       this.wasAcr = false;
 781       if (diet) {
 782         jumpOverMethodBody();
 783         diet = false;
 784         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
 785       }
 786       try {
 787         while (true) { //loop for jumping over comments
 788           withoutUnicodePtr = 0;
 789           //start with a new token (even comment written with unicode )
 790
 791           // ---------Consume white space and handles startPosition---------
 792           int whiteStart = currentPosition;
 793           boolean isWhiteSpace;
 794           do {
 795             startPosition = currentPosition;
 796             currentCharacter = source[currentPosition++];
 797             //            if (((currentCharacter = source[currentPosition++]) == '\\')
 798             //              && (source[currentPosition] == 'u')) {
 799             //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
 800             //            } else {
 801             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 802               checkNonExternalizeString();
 803               if (recordLineSeparator) {
 804                 pushLineSeparator();
 805               } else {
 806                 currentLine = null;
 807               }
 808             }
 809             isWhiteSpace = (currentCharacter == ' ') || Character.isWhitespace(currentCharacter);
 810             //            }
 811           } while (isWhiteSpace);
 812           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
 813             // reposition scanner in case we are interested by spaces as tokens
 814             currentPosition--;
 815             startPosition = whiteStart;
 816             return TokenNameWHITESPACE;
 817           }
 818           //little trick to get out in the middle of a source compuation
 819           if (currentPosition > eofPosition)
 820             return TokenNameEOF;
 821
 822           // ---------Identify the next token-------------
 823
 824           switch (currentCharacter) {
 825             case '(' :
 826               return TokenNameLPAREN;
 827             case ')' :
 828               return TokenNameRPAREN;
 829             case '{' :
 830               return TokenNameLBRACE;
 831             case '}' :
 832               return TokenNameRBRACE;
 833             case '[' :
 834               return TokenNameLBRACKET;
 835             case ']' :
 836               return TokenNameRBRACKET;
 837             case ';' :
 838               return TokenNameSEMICOLON;
 839             case ',' :
 840               return TokenNameCOMMA;
 841
 842             case '.' :
 843               if (getNextCharAsDigit())
 844                 return scanNumber(true);
 845               return TokenNameDOT;
 846             case '+' :
 847               {
 848                 int test;
 849                 if ((test = getNextChar('+', '=')) == 0)
 850                   return TokenNamePLUS_PLUS;
 851                 if (test > 0)
 852                   return TokenNamePLUS_EQUAL;
 853                 return TokenNamePLUS;
 854               }
 855             case '-' :
 856               {
 857                 int test;
 858                 if ((test = getNextChar('-', '=')) == 0)
 859                   return TokenNameMINUS_MINUS;
 860                 if (test > 0)
 861                   return TokenNameMINUS_EQUAL;
 862                 if (getNextChar('>'))
 863                   return TokenNameMINUS_GREATER;
 864
 865                 return TokenNameMINUS;
 866               }
 867             case '~' :
 868               if (getNextChar('='))
 869                 return TokenNameTWIDDLE_EQUAL;
 870               return TokenNameTWIDDLE;
 871             case '!' :
 872               if (getNextChar('=')) {
 873                 if (getNextChar('=')) {
 874                   return TokenNameNOT_EQUAL_EQUAL;
 875                 }
 876                 return TokenNameNOT_EQUAL;
 877               }
 878               return TokenNameNOT;
 879             case '*' :
 880               if (getNextChar('='))
 881                 return TokenNameMULTIPLY_EQUAL;
 882               return TokenNameMULTIPLY;
 883             case '%' :
 884               if (getNextChar('='))
 885                 return TokenNameREMAINDER_EQUAL;
 886               return TokenNameREMAINDER;
 887             case '<' :
 888               {
 889                 int test;
 890                 if ((test = getNextChar('=', '<')) == 0)
 891                   return TokenNameLESS_EQUAL;
 892                 if (test > 0) {
 893                   if (getNextChar('='))
 894                     return TokenNameLEFT_SHIFT_EQUAL;
 895                   if (getNextChar('<')) {
 896                     int heredocStart = currentPosition;
 897                     int heredocLength = 0;
 898                     currentCharacter = source[currentPosition++];
 899                     if (isPHPIdentifierStart(currentCharacter)) {
 900                       currentCharacter = source[currentPosition++];
 901                     } else {
 902                       return TokenNameERROR;
 903                     }
 904                     while (isPHPIdentifierPart(currentCharacter)) {
 905                       currentCharacter = source[currentPosition++];
 906                     }
 907
 908                     heredocLength = currentPosition - heredocStart - 1;
 909
 910                     // heredoc end-tag determination
 911                     boolean endTag = true;
 912                     char ch;
 913                     do {
 914                       ch = source[currentPosition++];
 915                       if (ch == '\r' || ch == '\n') {
 916                         if (recordLineSeparator) {
 917                           pushLineSeparator();
 918                         } else {
 919                           currentLine = null;
 920                         }
 921                         for (int i = 0; i < heredocLength; i++) {
 922                           if (source[currentPosition + i] != source[heredocStart + i]) {
 923                             endTag = false;
 924                             break;
 925                           }
 926                         }
 927                         if (endTag) {
 928                           currentPosition += heredocLength - 1;
 929                           currentCharacter = source[currentPosition++];
 930                           break; // do...while loop
 931                         } else {
 932                           endTag = true;
 933                         }
 934                       }
 935
 936                     } while (true);
 937
 938                     return TokenNameHEREDOC;
 939                   }
 940                   return TokenNameLEFT_SHIFT;
 941                 }
 942                 return TokenNameLESS;
 943               }
 944             case '>' :
 945               {
 946                 int test;
 947                 if ((test = getNextChar('=', '>')) == 0)
 948                   return TokenNameGREATER_EQUAL;
 949                 if (test > 0) {
 950                   if ((test = getNextChar('=', '>')) == 0)
 951                     return TokenNameRIGHT_SHIFT_EQUAL;
 952                   return TokenNameRIGHT_SHIFT;
 953                 }
 954                 return TokenNameGREATER;
 955               }
 956             case '=' :
 957               if (getNextChar('=')) {
 958                 if (getNextChar('=')) {
 959                   return TokenNameEQUAL_EQUAL_EQUAL;
 960                 }
 961                 return TokenNameEQUAL_EQUAL;
 962               }
 963               if (getNextChar('>'))
 964                 return TokenNameEQUAL_GREATER;
 965               return TokenNameEQUAL;
 966             case '&' :
 967               {
 968                 int test;
 969                 if ((test = getNextChar('&', '=')) == 0)
 970                   return TokenNameAND_AND;
 971                 if (test > 0)
 972                   return TokenNameAND_EQUAL;
 973                 return TokenNameAND;
 974               }
 975             case '|' :
 976               {
 977                 int test;
 978                 if ((test = getNextChar('|', '=')) == 0)
 979                   return TokenNameOR_OR;
 980                 if (test > 0)
 981                   return TokenNameOR_EQUAL;
 982                 return TokenNameOR;
 983               }
 984             case '^' :
 985               if (getNextChar('='))
 986                 return TokenNameXOR_EQUAL;
 987               return TokenNameXOR;
 988             case '?' :
 989               if (getNextChar('>')) {
 990                 phpMode = false;
 991                 return TokenNameStopPHP;
 992               }
 993               return TokenNameQUESTION;
 994             case ':' :
 995               if (getNextChar(':'))
 996                 return TokenNameCOLON_COLON;
 997               return TokenNameCOLON;
 998             case '@' :
 999               return TokenNameAT;
1000               //                                        case '\'' :
1001               //                                                {
1002               //                                                        int test;
1003               //                                                        if ((test = getNextChar('\n', '\r')) == 0) {
1004               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1005               //                                                        }
1006               //                                                        if (test > 0) {
1007               //                                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1008               //                                                                for (int lookAhead = 0;
1009               //                                                                        lookAhead < 3;
1010               //                                                                        lookAhead++) {
1011               //                                                                        if (currentPosition + lookAhead
1012               //                                                                                == source.length)
1013               //                                                                                break;
1014               //                                                                        if (source[currentPosition + lookAhead]
1015               //                                                                                == '\n')
1016               //                                                                                break;
1017               //                                                                        if (source[currentPosition + lookAhead]
1018               //                                                                                == '\'') {
1019               //                                                                                currentPosition += lookAhead + 1;
1020               //                                                                                break;
1021               //                                                                        }
1022               //                                                                }
1023               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1024               //                                                        }
1025               //                                                }
1026               //                                                if (getNextChar('\'')) {
1027               //                                                        // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1028               //                                                        for (int lookAhead = 0;
1029               //                                                                lookAhead < 3;
1030               //                                                                lookAhead++) {
1031               //                                                                if (currentPosition + lookAhead
1032               //                                                                        == source.length)
1033               //                                                                        break;
1034               //                                                                if (source[currentPosition + lookAhead]
1035               //                                                                        == '\n')
1036               //                                                                        break;
1037               //                                                                if (source[currentPosition + lookAhead]
1038               //                                                                        == '\'') {
1039               //                                                                        currentPosition += lookAhead + 1;
1040               //                                                                        break;
1041               //                                                                }
1042               //                                                        }
1043               //                                                        throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1044               //                                                }
1045               //                                                if (getNextChar('\\'))
1046               //                                                        scanEscapeCharacter();
1047               //                                                else { // consume next character
1048               //                                                        unicodeAsBackSlash = false;
1049               //                                                        if (((currentCharacter = source[currentPosition++])
1050               //                                                                == '\\')
1051               //                                                                && (source[currentPosition] == 'u')) {
1052               //                                                                getNextUnicodeChar();
1053               //                                                        } else {
1054               //                                                                if (withoutUnicodePtr != 0) {
1055               //                                                                        withoutUnicodeBuffer[++withoutUnicodePtr] =
1056               //                                                                                currentCharacter;
1057               //                                                                }
1058               //                                                        }
1059               //                                                }
1060               //                                                // if (getNextChar('\''))
1061               //                                                // return TokenNameCharacterLiteral;
1062               //                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1063               //                                                for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1064               //                                                        if (currentPosition + lookAhead == source.length)
1065               //                                                                break;
1066               //                                                        if (source[currentPosition + lookAhead] == '\n')
1067               //                                                                break;
1068               //                                                        if (source[currentPosition + lookAhead] == '\'') {
1069               //                                                                currentPosition += lookAhead + 1;
1070               //                                                                break;
1071               //                                                        }
1072               //                                                }
1073               //                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1074             case '\'' :
1075               try {
1076                 // consume next character
1077                 unicodeAsBackSlash = false;
1078                 currentCharacter = source[currentPosition++];
1079                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1080                 //                  && (source[currentPosition] == 'u')) {
1081                 //                  getNextUnicodeChar();
1082                 //                } else {
1083                 //                  if (withoutUnicodePtr != 0) {
1084                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1085                 //                      currentCharacter;
1086                 //                  }
1087                 //                }
1088
1089                 while (currentCharacter != '\'') {
1090
1091                   /** ** in PHP \r and \n are valid in string literals *** */
1092                   //                  if ((currentCharacter == '\n')
1093                   //                    || (currentCharacter == '\r')) {
1094                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1095                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1096                   //                      if (currentPosition + lookAhead == source.length)
1097                   //                        break;
1098                   //                      if (source[currentPosition + lookAhead] == '\n')
1099                   //                        break;
1100                   //                      if (source[currentPosition + lookAhead] == '\"') {
1101                   //                        currentPosition += lookAhead + 1;
1102                   //                        break;
1103                   //                      }
1104                   //                    }
1105                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1106                   //                  }
1107                   if (currentCharacter == '\\') {
1108                     int escapeSize = currentPosition;
1109                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1110                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1111                     scanSingleQuotedEscapeCharacter();
1112                     escapeSize = currentPosition - escapeSize;
1113                     if (withoutUnicodePtr == 0) {
1114                       //buffer all the entries that have been left aside....
1115                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1116                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1117                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1118                     } else { //overwrite the / in the buffer
1119                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1120                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1121                         withoutUnicodePtr--;
1122                       }
1123                     }
1124                   }
1125                   // consume next character
1126                   unicodeAsBackSlash = false;
1127                   currentCharacter = source[currentPosition++];
1128                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1129                   //                    && (source[currentPosition] == 'u')) {
1130                   //                    getNextUnicodeChar();
1131                   //                  } else {
1132                   if (withoutUnicodePtr != 0) {
1133                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1134                   }
1135                   //                  }
1136
1137                 }
1138               } catch (IndexOutOfBoundsException e) {
1139                 throw new InvalidInputException(UNTERMINATED_STRING);
1140               } catch (InvalidInputException e) {
1141                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1142                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1143                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1144                     if (currentPosition + lookAhead == source.length)
1145                       break;
1146                     if (source[currentPosition + lookAhead] == '\n')
1147                       break;
1148                     if (source[currentPosition + lookAhead] == '\'') {
1149                       currentPosition += lookAhead + 1;
1150                       break;
1151                     }
1152                   }
1153
1154                 }
1155                 throw e; // rethrow
1156               }
1157               if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1158                 if (currentLine == null) {
1159                   currentLine = new NLSLine();
1160                   lines.add(currentLine);
1161                 }
1162                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1163               }
1164               return TokenNameStringConstant;
1165             case '"' :
1166               try {
1167                 // consume next character
1168                 unicodeAsBackSlash = false;
1169                 currentCharacter = source[currentPosition++];
1170                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1171                 //                  && (source[currentPosition] == 'u')) {
1172                 //                  getNextUnicodeChar();
1173                 //                } else {
1174                 //                  if (withoutUnicodePtr != 0) {
1175                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1176                 //                      currentCharacter;
1177                 //                  }
1178                 //                }
1179
1180                 while (currentCharacter != '"') {
1181
1182                   /** ** in PHP \r and \n are valid in string literals *** */
1183                   //                  if ((currentCharacter == '\n')
1184                   //                    || (currentCharacter == '\r')) {
1185                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1186                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1187                   //                      if (currentPosition + lookAhead == source.length)
1188                   //                        break;
1189                   //                      if (source[currentPosition + lookAhead] == '\n')
1190                   //                        break;
1191                   //                      if (source[currentPosition + lookAhead] == '\"') {
1192                   //                        currentPosition += lookAhead + 1;
1193                   //                        break;
1194                   //                      }
1195                   //                    }
1196                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1197                   //                  }
1198                   if (currentCharacter == '\\') {
1199                     int escapeSize = currentPosition;
1200                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1201                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1202                     scanDoubleQuotedEscapeCharacter();
1203                     escapeSize = currentPosition - escapeSize;
1204                     if (withoutUnicodePtr == 0) {
1205                       //buffer all the entries that have been left aside....
1206                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1207                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1208                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1209                     } else { //overwrite the / in the buffer
1210                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1211                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1212                         withoutUnicodePtr--;
1213                       }
1214                     }
1215                   }
1216                   // consume next character
1217                   unicodeAsBackSlash = false;
1218                   currentCharacter = source[currentPosition++];
1219                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1220                   //                    && (source[currentPosition] == 'u')) {
1221                   //                    getNextUnicodeChar();
1222                   //                  } else {
1223                   if (withoutUnicodePtr != 0) {
1224                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1225                   }
1226                   //                  }
1227
1228                 }
1229               } catch (IndexOutOfBoundsException e) {
1230                 throw new InvalidInputException(UNTERMINATED_STRING);
1231               } catch (InvalidInputException e) {
1232                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1233                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1234                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1235                     if (currentPosition + lookAhead == source.length)
1236                       break;
1237                     if (source[currentPosition + lookAhead] == '\n')
1238                       break;
1239                     if (source[currentPosition + lookAhead] == '\"') {
1240                       currentPosition += lookAhead + 1;
1241                       break;
1242                     }
1243                   }
1244
1245                 }
1246                 throw e; // rethrow
1247               }
1248               if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1249                 if (currentLine == null) {
1250                   currentLine = new NLSLine();
1251                   lines.add(currentLine);
1252                 }
1253                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1254               }
1255               return TokenNameStringLiteral;
1256             case '`' :
1257               try {
1258                 // consume next character
1259                 unicodeAsBackSlash = false;
1260                 currentCharacter = source[currentPosition++];
1261                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1262                 //                  && (source[currentPosition] == 'u')) {
1263                 //                  getNextUnicodeChar();
1264                 //                } else {
1265                 //                  if (withoutUnicodePtr != 0) {
1266                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1267                 //                      currentCharacter;
1268                 //                  }
1269                 //                }
1270
1271                 while (currentCharacter != '`') {
1272
1273                   /** ** in PHP \r and \n are valid in string literals *** */
1274                   //                if ((currentCharacter == '\n')
1275                   //                  || (currentCharacter == '\r')) {
1276                   //                  // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1277                   //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1278                   //                    if (currentPosition + lookAhead == source.length)
1279                   //                      break;
1280                   //                    if (source[currentPosition + lookAhead] == '\n')
1281                   //                      break;
1282                   //                    if (source[currentPosition + lookAhead] == '\"') {
1283                   //                      currentPosition += lookAhead + 1;
1284                   //                      break;
1285                   //                    }
1286                   //                  }
1287                   //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1288                   //                }
1289                   if (currentCharacter == '\\') {
1290                     int escapeSize = currentPosition;
1291                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1292                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1293                     scanDoubleQuotedEscapeCharacter();
1294                     escapeSize = currentPosition - escapeSize;
1295                     if (withoutUnicodePtr == 0) {
1296                       //buffer all the entries that have been left aside....
1297                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1298                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1299                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1300                     } else { //overwrite the / in the buffer
1301                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1302                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1303                         withoutUnicodePtr--;
1304                       }
1305                     }
1306                   }
1307                   // consume next character
1308                   unicodeAsBackSlash = false;
1309                   currentCharacter = source[currentPosition++];
1310                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1311                   //                    && (source[currentPosition] == 'u')) {
1312                   //                    getNextUnicodeChar();
1313                   //                  } else {
1314                   if (withoutUnicodePtr != 0) {
1315                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1316                   }
1317                   //                  }
1318
1319                 }
1320               } catch (IndexOutOfBoundsException e) {
1321                 throw new InvalidInputException(UNTERMINATED_STRING);
1322               } catch (InvalidInputException e) {
1323                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1324                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1325                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1326                     if (currentPosition + lookAhead == source.length)
1327                       break;
1328                     if (source[currentPosition + lookAhead] == '\n')
1329                       break;
1330                     if (source[currentPosition + lookAhead] == '`') {
1331                       currentPosition += lookAhead + 1;
1332                       break;
1333                     }
1334                   }
1335
1336                 }
1337                 throw e; // rethrow
1338               }
1339               if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1340                 if (currentLine == null) {
1341                   currentLine = new NLSLine();
1342                   lines.add(currentLine);
1343                 }
1344                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1345               }
1346               return TokenNameStringInterpolated;
1347             case '#' :
1348             case '/' :
1349               {
1350                 int test;
1351                 if ((currentCharacter == '#') || (test = getNextChar('/', '*')) == 0) {
1352                   //line comment
1353                   int endPositionForLineComment = 0;
1354                   try { //get the next char
1355                     currentCharacter = source[currentPosition++];
1356                     //                    if (((currentCharacter = source[currentPosition++])
1357                     //                      == '\\')
1358                     //                      && (source[currentPosition] == 'u')) {
1359                     //                      //-------------unicode traitement ------------
1360                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1361                     //                      currentPosition++;
1362                     //                      while (source[currentPosition] == 'u') {
1363                     //                        currentPosition++;
1364                     //                      }
1365                     //                      if ((c1 =
1366                     //                        Character.getNumericValue(source[currentPosition++]))
1367                     //                        > 15
1368                     //                        || c1 < 0
1369                     //                        || (c2 =
1370                     //                          Character.getNumericValue(source[currentPosition++]))
1371                     //                          > 15
1372                     //                        || c2 < 0
1373                     //                        || (c3 =
1374                     //                          Character.getNumericValue(source[currentPosition++]))
1375                     //                          > 15
1376                     //                        || c3 < 0
1377                     //                        || (c4 =
1378                     //                          Character.getNumericValue(source[currentPosition++]))
1379                     //                          > 15
1380                     //                        || c4 < 0) {
1381                     //                        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1382                     //                      } else {
1383                     //                        currentCharacter =
1384                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1385                     //                      }
1386                     //                    }
1387
1388                     //handle the \\u case manually into comment
1389                     //                    if (currentCharacter == '\\') {
1390                     //                      if (source[currentPosition] == '\\')
1391                     //                        currentPosition++;
1392                     //                    } //jump over the \\
1393                     boolean isUnicode = false;
1394                     while (currentCharacter != '\r' && currentCharacter != '\n') {
1395                       if (currentCharacter == '?') {
1396                         if (getNextChar('>')) {
1397                           startPosition = currentPosition - 2;
1398                           phpMode = false;
1399                           return TokenNameStopPHP;
1400                         }
1401                       }
1402
1403                       //get the next char
1404                       isUnicode = false;
1405                       currentCharacter = source[currentPosition++];
1406                       //                      if (((currentCharacter = source[currentPosition++])
1407                       //                        == '\\')
1408                       //                        && (source[currentPosition] == 'u')) {
1409                       //                        isUnicode = true;
1410                       //                        //-------------unicode traitement ------------
1411                       //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1412                       //                        currentPosition++;
1413                       //                        while (source[currentPosition] == 'u') {
1414                       //                          currentPosition++;
1415                       //                        }
1416                       //                        if ((c1 =
1417                       //                          Character.getNumericValue(source[currentPosition++]))
1418                       //                          > 15
1419                       //                          || c1 < 0
1420                       //                          || (c2 =
1421                       //                            Character.getNumericValue(
1422                       //                              source[currentPosition++]))
1423                       //                            > 15
1424                       //                          || c2 < 0
1425                       //                          || (c3 =
1426                       //                            Character.getNumericValue(
1427                       //                              source[currentPosition++]))
1428                       //                            > 15
1429                       //                          || c3 < 0
1430                       //                          || (c4 =
1431                       //                            Character.getNumericValue(
1432                       //                              source[currentPosition++]))
1433                       //                            > 15
1434                       //                          || c4 < 0) {
1435                       //                          throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1436                       //                        } else {
1437                       //                          currentCharacter =
1438                       //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1439                       //                        }
1440                       //                      }
1441                       //handle the \\u case manually into comment
1442                       //                      if (currentCharacter == '\\') {
1443                       //                        if (source[currentPosition] == '\\')
1444                       //                          currentPosition++;
1445                       //                      } //jump over the \\
1446                     }
1447                     if (isUnicode) {
1448                       endPositionForLineComment = currentPosition - 6;
1449                     } else {
1450                       endPositionForLineComment = currentPosition - 1;
1451                     }
1452                     recordComment(false);
1453                     if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1454                       checkNonExternalizeString();
1455                       if (recordLineSeparator) {
1456                         if (isUnicode) {
1457                           pushUnicodeLineSeparator();
1458                         } else {
1459                           pushLineSeparator();
1460                         }
1461                       } else {
1462                         currentLine = null;
1463                       }
1464                     }
1465                     if (tokenizeComments) {
1466                       if (!isUnicode) {
1467                         currentPosition = endPositionForLineComment;
1468                         // reset one character behind
1469                       }
1470                       return TokenNameCOMMENT_LINE;
1471                     }
1472                   } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1473                     if (tokenizeComments) {
1474                       currentPosition--;
1475                       // reset one character behind
1476                       return TokenNameCOMMENT_LINE;
1477                     }
1478                   }
1479                   break;
1480                 }
1481                 if (test > 0) {
1482                   //traditional and annotation comment
1483                   boolean isJavadoc = false, star = false;
1484                   // consume next character
1485                   unicodeAsBackSlash = false;
1486                   currentCharacter = source[currentPosition++];
1487                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1488                   //                    && (source[currentPosition] == 'u')) {
1489                   //                    getNextUnicodeChar();
1490                   //                  } else {
1491                   //                    if (withoutUnicodePtr != 0) {
1492                   //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1493                   //                        currentCharacter;
1494                   //                    }
1495                   //                  }
1496
1497                   if (currentCharacter == '*') {
1498                     isJavadoc = true;
1499                     star = true;
1500                   }
1501                   if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1502                     checkNonExternalizeString();
1503                     if (recordLineSeparator) {
1504                       pushLineSeparator();
1505                     } else {
1506                       currentLine = null;
1507                     }
1508                   }
1509                   try { //get the next char
1510                     currentCharacter = source[currentPosition++];
1511                     //                    if (((currentCharacter = source[currentPosition++])
1512                     //                      == '\\')
1513                     //                      && (source[currentPosition] == 'u')) {
1514                     //                      //-------------unicode traitement ------------
1515                     //                      getNextUnicodeChar();
1516                     //                    }
1517                     //handle the \\u case manually into comment
1518                     //                    if (currentCharacter == '\\') {
1519                     //                      if (source[currentPosition] == '\\')
1520                     //                        currentPosition++;
1521                     //                      //jump over the \\
1522                     //                    }
1523                     // empty comment is not a javadoc /**/
1524                     if (currentCharacter == '/') {
1525                       isJavadoc = false;
1526                     }
1527                     //loop until end of comment */
1528                     while ((currentCharacter != '/') || (!star)) {
1529                       if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1530                         checkNonExternalizeString();
1531                         if (recordLineSeparator) {
1532                           pushLineSeparator();
1533                         } else {
1534                           currentLine = null;
1535                         }
1536                       }
1537                       star = currentCharacter == '*';
1538                       //get next char
1539                       currentCharacter = source[currentPosition++];
1540                       //                      if (((currentCharacter = source[currentPosition++])
1541                       //                        == '\\')
1542                       //                        && (source[currentPosition] == 'u')) {
1543                       //                        //-------------unicode traitement ------------
1544                       //                        getNextUnicodeChar();
1545                       //                      }
1546                       //handle the \\u case manually into comment
1547                       //                      if (currentCharacter == '\\') {
1548                       //                        if (source[currentPosition] == '\\')
1549                       //                          currentPosition++;
1550                       //                      } //jump over the \\
1551                     }
1552                     recordComment(isJavadoc);
1553                     if (tokenizeComments) {
1554                       if (isJavadoc)
1555                         return TokenNameCOMMENT_PHPDOC;
1556                       return TokenNameCOMMENT_BLOCK;
1557                     }
1558                   } catch (IndexOutOfBoundsException e) {
1559                     throw new InvalidInputException(UNTERMINATED_COMMENT);
1560                   }
1561                   break;
1562                 }
1563                 if (getNextChar('='))
1564                   return TokenNameDIVIDE_EQUAL;
1565                 return TokenNameDIVIDE;
1566               }
1567             case '\u001a' :
1568               if (atEnd())
1569                 return TokenNameEOF;
1570               //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1571               throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1572
1573             default :
1574               if (currentCharacter == '$') {
1575                 while ((currentCharacter = source[currentPosition++]) == '$') {
1576                 }
1577                 if (currentCharacter == '{')
1578                   return TokenNameDOLLAR_LBRACE;
1579                 if (isPHPIdentifierStart(currentCharacter))
1580                   return scanIdentifierOrKeyword(true);
1581                 return TokenNameERROR;
1582               }
1583               if (isPHPIdentifierStart(currentCharacter))
1584                 return scanIdentifierOrKeyword(false);
1585               if (Character.isDigit(currentCharacter))
1586                 return scanNumber(false);
1587               return TokenNameERROR;
1588           }
1589         }
1590       } //-----------------end switch while try--------------------
1591       catch (IndexOutOfBoundsException e) {
1592       }
1593     }
1594     return TokenNameEOF;
1595   }
1596
1597   //  public final void getNextUnicodeChar()
1598   //    throws IndexOutOfBoundsException, InvalidInputException {
1599   //    //VOID
1600   //    //handle the case of unicode.
1601   //    //when a unicode appears then we must use a buffer that holds char internal values
1602   //    //At the end of this method currentCharacter holds the new visited char
1603   //    //and currentPosition points right next after it
1604   //
1605   //    //ALL getNextChar.... ARE OPTIMIZED COPIES
1606   //
1607   //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1608   //    currentPosition++;
1609   //    while (source[currentPosition] == 'u') {
1610   //      currentPosition++;
1611   //      unicodeSize++;
1612   //    }
1613   //
1614   //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1615   //      || c1 < 0
1616   //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1617   //      || c2 < 0
1618   //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1619   //      || c3 < 0
1620   //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1621   //      || c4 < 0) {
1622   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1623   //    } else {
1624   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1625   //      //need the unicode buffer
1626   //      if (withoutUnicodePtr == 0) {
1627   //        //buffer all the entries that have been left aside....
1628   //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1629   //        System.arraycopy(
1630   //          source,
1631   //          startPosition,
1632   //          withoutUnicodeBuffer,
1633   //          1,
1634   //          withoutUnicodePtr);
1635   //      }
1636   //      //fill the buffer with the char
1637   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1638   //    }
1639   //    unicodeAsBackSlash = currentCharacter == '\\';
1640   //  }
1641   /*
1642    * Tokenize a method body, assuming that curly brackets are properly balanced.
1643    */
1644   public final void jumpOverMethodBody() {
1645
1646     this.wasAcr = false;
1647     int found = 1;
1648     try {
1649       while (true) { //loop for jumping over comments
1650         // ---------Consume white space and handles startPosition---------
1651         boolean isWhiteSpace;
1652         do {
1653           startPosition = currentPosition;
1654           currentCharacter = source[currentPosition++];
1655           //          if (((currentCharacter = source[currentPosition++]) == '\\')
1656           //            && (source[currentPosition] == 'u')) {
1657           //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
1658           //          } else {
1659           if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1660             pushLineSeparator();
1661           isWhiteSpace = Character.isWhitespace(currentCharacter);
1662           //          }
1663         } while (isWhiteSpace);
1664
1665         // -------consume token until } is found---------
1666         switch (currentCharacter) {
1667           case '{' :
1668             found++;
1669             break;
1670           case '}' :
1671             found--;
1672             if (found == 0)
1673               return;
1674             break;
1675           case '\'' :
1676             {
1677               boolean test;
1678               test = getNextChar('\\');
1679               if (test) {
1680                 try {
1681                   scanDoubleQuotedEscapeCharacter();
1682                 } catch (InvalidInputException ex) {
1683                 };
1684               } else {
1685                 //                try { // consume next character
1686                 unicodeAsBackSlash = false;
1687                 currentCharacter = source[currentPosition++];
1688                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1689                 //                    && (source[currentPosition] == 'u')) {
1690                 //                    getNextUnicodeChar();
1691                 //                  } else {
1692                 if (withoutUnicodePtr != 0) {
1693                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1694                 }
1695                 //                  }
1696                 //                } catch (InvalidInputException ex) {
1697                 //                };
1698               }
1699               getNextChar('\'');
1700               break;
1701             }
1702           case '"' :
1703             try {
1704               //              try { // consume next character
1705               unicodeAsBackSlash = false;
1706               currentCharacter = source[currentPosition++];
1707               //                if (((currentCharacter = source[currentPosition++]) == '\\')
1708               //                  && (source[currentPosition] == 'u')) {
1709               //                  getNextUnicodeChar();
1710               //                } else {
1711               if (withoutUnicodePtr != 0) {
1712                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1713               }
1714               //                }
1715               //              } catch (InvalidInputException ex) {
1716               //              };
1717               while (currentCharacter != '"') {
1718                 if (currentCharacter == '\r') {
1719                   if (source[currentPosition] == '\n')
1720                     currentPosition++;
1721                   break;
1722                   // the string cannot go further that the line
1723                 }
1724                 if (currentCharacter == '\n') {
1725                   break;
1726                   // the string cannot go further that the line
1727                 }
1728                 if (currentCharacter == '\\') {
1729                   try {
1730                     scanDoubleQuotedEscapeCharacter();
1731                   } catch (InvalidInputException ex) {
1732                   };
1733                 }
1734                 //                try { // consume next character
1735                 unicodeAsBackSlash = false;
1736                 currentCharacter = source[currentPosition++];
1737                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1738                 //                    && (source[currentPosition] == 'u')) {
1739                 //                    getNextUnicodeChar();
1740                 //                  } else {
1741                 if (withoutUnicodePtr != 0) {
1742                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1743                 }
1744                 //                  }
1745                 //                } catch (InvalidInputException ex) {
1746                 //                };
1747               }
1748             } catch (IndexOutOfBoundsException e) {
1749               return;
1750             }
1751             break;
1752           case '/' :
1753             {
1754               int test;
1755               if ((test = getNextChar('/', '*')) == 0) {
1756                 //line comment
1757                 try {
1758                   //get the next char
1759                   currentCharacter = source[currentPosition++];
1760                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1761                   //                    && (source[currentPosition] == 'u')) {
1762                   //                    //-------------unicode traitement ------------
1763                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1764                   //                    currentPosition++;
1765                   //                    while (source[currentPosition] == 'u') {
1766                   //                      currentPosition++;
1767                   //                    }
1768                   //                    if ((c1 =
1769                   //                      Character.getNumericValue(source[currentPosition++]))
1770                   //                      > 15
1771                   //                      || c1 < 0
1772                   //                      || (c2 =
1773                   //                        Character.getNumericValue(source[currentPosition++]))
1774                   //                        > 15
1775                   //                      || c2 < 0
1776                   //                      || (c3 =
1777                   //                        Character.getNumericValue(source[currentPosition++]))
1778                   //                        > 15
1779                   //                      || c3 < 0
1780                   //                      || (c4 =
1781                   //                        Character.getNumericValue(source[currentPosition++]))
1782                   //                        > 15
1783                   //                      || c4 < 0) {
1784                   //                      //error don't care of the value
1785                   //                      currentCharacter = 'A';
1786                   //                    } //something different from \n and \r
1787                   //                    else {
1788                   //                      currentCharacter =
1789                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1790                   //                    }
1791                   //                  }
1792
1793                   while (currentCharacter != '\r' && currentCharacter != '\n') {
1794                     //get the next char
1795                     currentCharacter = source[currentPosition++];
1796                     //                    if (((currentCharacter = source[currentPosition++])
1797                     //                      == '\\')
1798                     //                      && (source[currentPosition] == 'u')) {
1799                     //                      //-------------unicode traitement ------------
1800                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1801                     //                      currentPosition++;
1802                     //                      while (source[currentPosition] == 'u') {
1803                     //                        currentPosition++;
1804                     //                      }
1805                     //                      if ((c1 =
1806                     //                        Character.getNumericValue(source[currentPosition++]))
1807                     //                        > 15
1808                     //                        || c1 < 0
1809                     //                        || (c2 =
1810                     //                          Character.getNumericValue(source[currentPosition++]))
1811                     //                          > 15
1812                     //                        || c2 < 0
1813                     //                        || (c3 =
1814                     //                          Character.getNumericValue(source[currentPosition++]))
1815                     //                          > 15
1816                     //                        || c3 < 0
1817                     //                        || (c4 =
1818                     //                          Character.getNumericValue(source[currentPosition++]))
1819                     //                          > 15
1820                     //                        || c4 < 0) {
1821                     //                        //error don't care of the value
1822                     //                        currentCharacter = 'A';
1823                     //                      } //something different from \n and \r
1824                     //                      else {
1825                     //                        currentCharacter =
1826                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1827                     //                      }
1828                     //                    }
1829                   }
1830                   if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1831                     pushLineSeparator();
1832                 } catch (IndexOutOfBoundsException e) {
1833                 } //an eof will them be generated
1834                 break;
1835               }
1836               if (test > 0) {
1837                 //traditional and annotation comment
1838                 boolean star = false;
1839                 //                try { // consume next character
1840                 unicodeAsBackSlash = false;
1841                 currentCharacter = source[currentPosition++];
1842                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1843                 //                    && (source[currentPosition] == 'u')) {
1844                 //                    getNextUnicodeChar();
1845                 //                  } else {
1846                 if (withoutUnicodePtr != 0) {
1847                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1848                 }
1849                 //                  };
1850                 //                } catch (InvalidInputException ex) {
1851                 //                };
1852                 if (currentCharacter == '*') {
1853                   star = true;
1854                 }
1855                 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1856                   pushLineSeparator();
1857                 try { //get the next char
1858                   currentCharacter = source[currentPosition++];
1859                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1860                   //                    && (source[currentPosition] == 'u')) {
1861                   //                    //-------------unicode traitement ------------
1862                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1863                   //                    currentPosition++;
1864                   //                    while (source[currentPosition] == 'u') {
1865                   //                      currentPosition++;
1866                   //                    }
1867                   //                    if ((c1 =
1868                   //                      Character.getNumericValue(source[currentPosition++]))
1869                   //                      > 15
1870                   //                      || c1 < 0
1871                   //                      || (c2 =
1872                   //                        Character.getNumericValue(source[currentPosition++]))
1873                   //                        > 15
1874                   //                      || c2 < 0
1875                   //                      || (c3 =
1876                   //                        Character.getNumericValue(source[currentPosition++]))
1877                   //                        > 15
1878                   //                      || c3 < 0
1879                   //                      || (c4 =
1880                   //                        Character.getNumericValue(source[currentPosition++]))
1881                   //                        > 15
1882                   //                      || c4 < 0) {
1883                   //                      //error don't care of the value
1884                   //                      currentCharacter = 'A';
1885                   //                    } //something different from * and /
1886                   //                    else {
1887                   //                      currentCharacter =
1888                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1889                   //                    }
1890                   //                  }
1891                   //loop until end of comment */
1892                   while ((currentCharacter != '/') || (!star)) {
1893                     if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1894                       pushLineSeparator();
1895                     star = currentCharacter == '*';
1896                     //get next char
1897                     currentCharacter = source[currentPosition++];
1898                     //                    if (((currentCharacter = source[currentPosition++])
1899                     //                      == '\\')
1900                     //                      && (source[currentPosition] == 'u')) {
1901                     //                      //-------------unicode traitement ------------
1902                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1903                     //                      currentPosition++;
1904                     //                      while (source[currentPosition] == 'u') {
1905                     //                        currentPosition++;
1906                     //                      }
1907                     //                      if ((c1 =
1908                     //                        Character.getNumericValue(source[currentPosition++]))
1909                     //                        > 15
1910                     //                        || c1 < 0
1911                     //                        || (c2 =
1912                     //                          Character.getNumericValue(source[currentPosition++]))
1913                     //                          > 15
1914                     //                        || c2 < 0
1915                     //                        || (c3 =
1916                     //                          Character.getNumericValue(source[currentPosition++]))
1917                     //                          > 15
1918                     //                        || c3 < 0
1919                     //                        || (c4 =
1920                     //                          Character.getNumericValue(source[currentPosition++]))
1921                     //                          > 15
1922                     //                        || c4 < 0) {
1923                     //                        //error don't care of the value
1924                     //                        currentCharacter = 'A';
1925                     //                      } //something different from * and /
1926                     //                      else {
1927                     //                        currentCharacter =
1928                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1929                     //                      }
1930                     //                    }
1931                   }
1932                 } catch (IndexOutOfBoundsException e) {
1933                   return;
1934                 }
1935                 break;
1936               }
1937               break;
1938             }
1939
1940           default :
1941             if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
1942               try {
1943                 scanIdentifierOrKeyword((currentCharacter == '$'));
1944               } catch (InvalidInputException ex) {
1945               };
1946               break;
1947             }
1948             if (Character.isDigit(currentCharacter)) {
1949               try {
1950                 scanNumber(false);
1951               } catch (InvalidInputException ex) {
1952               };
1953               break;
1954             }
1955         }
1956       }
1957       //-----------------end switch while try--------------------
1958     } catch (IndexOutOfBoundsException e) {
1959     } catch (InvalidInputException e) {
1960     }
1961     return;
1962   }
1963   //  public final boolean jumpOverUnicodeWhiteSpace()
1964   //    throws InvalidInputException {
1965   //    //BOOLEAN
1966   //    //handle the case of unicode. Jump over the next whiteSpace
1967   //    //making startPosition pointing on the next available char
1968   //    //On false, the currentCharacter is filled up with a potential
1969   //    //correct char
1970   //
1971   //    try {
1972   //      this.wasAcr = false;
1973   //      int c1, c2, c3, c4;
1974   //      int unicodeSize = 6;
1975   //      currentPosition++;
1976   //      while (source[currentPosition] == 'u') {
1977   //        currentPosition++;
1978   //        unicodeSize++;
1979   //      }
1980   //
1981   //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1982   //        || c1 < 0)
1983   //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
1984   //          || c2 < 0)
1985   //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
1986   //          || c3 < 0)
1987   //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
1988   //          || c4 < 0)) {
1989   //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1990   //      }
1991   //
1992   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1993   //      if (recordLineSeparator
1994   //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1995   //        pushLineSeparator();
1996   //      if (Character.isWhitespace(currentCharacter))
1997   //        return true;
1998   //
1999   //      //buffer the new char which is not a white space
2000   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2001   //      //withoutUnicodePtr == 1 is true here
2002   //      return false;
2003   //    } catch (IndexOutOfBoundsException e) {
2004   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2005   //    }
2006   //  }
2007   public final int[] getLineEnds() {
2008     //return a bounded copy of this.lineEnds
2009
2010     int[] copy;
2011     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2012     return copy;
2013   }
2014
2015   public char[] getSource() {
2016     return this.source;
2017   }
2018   final char[] optimizedCurrentTokenSource1() {
2019     //return always the same char[] build only once
2020
2021     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2022     char charOne = source[startPosition];
2023     switch (charOne) {
2024       case 'a' :
2025         return charArray_a;
2026       case 'b' :
2027         return charArray_b;
2028       case 'c' :
2029         return charArray_c;
2030       case 'd' :
2031         return charArray_d;
2032       case 'e' :
2033         return charArray_e;
2034       case 'f' :
2035         return charArray_f;
2036       case 'g' :
2037         return charArray_g;
2038       case 'h' :
2039         return charArray_h;
2040       case 'i' :
2041         return charArray_i;
2042       case 'j' :
2043         return charArray_j;
2044       case 'k' :
2045         return charArray_k;
2046       case 'l' :
2047         return charArray_l;
2048       case 'm' :
2049         return charArray_m;
2050       case 'n' :
2051         return charArray_n;
2052       case 'o' :
2053         return charArray_o;
2054       case 'p' :
2055         return charArray_p;
2056       case 'q' :
2057         return charArray_q;
2058       case 'r' :
2059         return charArray_r;
2060       case 's' :
2061         return charArray_s;
2062       case 't' :
2063         return charArray_t;
2064       case 'u' :
2065         return charArray_u;
2066       case 'v' :
2067         return charArray_v;
2068       case 'w' :
2069         return charArray_w;
2070       case 'x' :
2071         return charArray_x;
2072       case 'y' :
2073         return charArray_y;
2074       case 'z' :
2075         return charArray_z;
2076       default :
2077         return new char[] { charOne };
2078     }
2079   }
2080
2081   final char[] optimizedCurrentTokenSource2() {
2082     //try to return the same char[] build only once
2083
2084     char c0, c1;
2085     int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) % TableSize;
2086     char[][] table = charArray_length[0][hash];
2087     int i = newEntry2;
2088     while (++i < InternalTableSize) {
2089       char[] charArray = table[i];
2090       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2091         return charArray;
2092     }
2093     //---------other side---------
2094     i = -1;
2095     int max = newEntry2;
2096     while (++i <= max) {
2097       char[] charArray = table[i];
2098       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2099         return charArray;
2100     }
2101     //--------add the entry-------
2102     if (++max >= InternalTableSize)
2103       max = 0;
2104     char[] r;
2105     table[max] = (r = new char[] { c0, c1 });
2106     newEntry2 = max;
2107     return r;
2108   }
2109
2110   final char[] optimizedCurrentTokenSource3() {
2111     //try to return the same char[] build only once
2112
2113     char c0, c1, c2;
2114     int hash =
2115       (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2116         % TableSize;
2117     char[][] table = charArray_length[1][hash];
2118     int i = newEntry3;
2119     while (++i < InternalTableSize) {
2120       char[] charArray = table[i];
2121       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2122         return charArray;
2123     }
2124     //---------other side---------
2125     i = -1;
2126     int max = newEntry3;
2127     while (++i <= max) {
2128       char[] charArray = table[i];
2129       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2130         return charArray;
2131     }
2132     //--------add the entry-------
2133     if (++max >= InternalTableSize)
2134       max = 0;
2135     char[] r;
2136     table[max] = (r = new char[] { c0, c1, c2 });
2137     newEntry3 = max;
2138     return r;
2139   }
2140
2141   final char[] optimizedCurrentTokenSource4() {
2142     //try to return the same char[] build only once
2143
2144     char c0, c1, c2, c3;
2145     long hash =
2146       ((((long) (c0 = source[startPosition])) << 18)
2147         + ((c1 = source[startPosition + 1]) << 12)
2148         + ((c2 = source[startPosition + 2]) << 6)
2149         + (c3 = source[startPosition + 3]))
2150         % TableSize;
2151     char[][] table = charArray_length[2][(int) hash];
2152     int i = newEntry4;
2153     while (++i < InternalTableSize) {
2154       char[] charArray = table[i];
2155       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2156         return charArray;
2157     }
2158     //---------other side---------
2159     i = -1;
2160     int max = newEntry4;
2161     while (++i <= max) {
2162       char[] charArray = table[i];
2163       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2164         return charArray;
2165     }
2166     //--------add the entry-------
2167     if (++max >= InternalTableSize)
2168       max = 0;
2169     char[] r;
2170     table[max] = (r = new char[] { c0, c1, c2, c3 });
2171     newEntry4 = max;
2172     return r;
2173
2174   }
2175
2176   final char[] optimizedCurrentTokenSource5() {
2177     //try to return the same char[] build only once
2178
2179     char c0, c1, c2, c3, c4;
2180     long hash =
2181       ((((long) (c0 = source[startPosition])) << 24)
2182         + (((long) (c1 = source[startPosition + 1])) << 18)
2183         + ((c2 = source[startPosition + 2]) << 12)
2184         + ((c3 = source[startPosition + 3]) << 6)
2185         + (c4 = source[startPosition + 4]))
2186         % TableSize;
2187     char[][] table = charArray_length[3][(int) hash];
2188     int i = newEntry5;
2189     while (++i < InternalTableSize) {
2190       char[] charArray = table[i];
2191       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2192         return charArray;
2193     }
2194     //---------other side---------
2195     i = -1;
2196     int max = newEntry5;
2197     while (++i <= max) {
2198       char[] charArray = table[i];
2199       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2200         return charArray;
2201     }
2202     //--------add the entry-------
2203     if (++max >= InternalTableSize)
2204       max = 0;
2205     char[] r;
2206     table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2207     newEntry5 = max;
2208     return r;
2209
2210   }
2211
2212   final char[] optimizedCurrentTokenSource6() {
2213     //try to return the same char[] build only once
2214
2215     char c0, c1, c2, c3, c4, c5;
2216     long hash =
2217       ((((long) (c0 = source[startPosition])) << 32)
2218         + (((long) (c1 = source[startPosition + 1])) << 24)
2219         + (((long) (c2 = source[startPosition + 2])) << 18)
2220         + ((c3 = source[startPosition + 3]) << 12)
2221         + ((c4 = source[startPosition + 4]) << 6)
2222         + (c5 = source[startPosition + 5]))
2223         % TableSize;
2224     char[][] table = charArray_length[4][(int) hash];
2225     int i = newEntry6;
2226     while (++i < InternalTableSize) {
2227       char[] charArray = table[i];
2228       if ((c0 == charArray[0])
2229         && (c1 == charArray[1])
2230         && (c2 == charArray[2])
2231         && (c3 == charArray[3])
2232         && (c4 == charArray[4])
2233         && (c5 == charArray[5]))
2234         return charArray;
2235     }
2236     //---------other side---------
2237     i = -1;
2238     int max = newEntry6;
2239     while (++i <= max) {
2240       char[] charArray = table[i];
2241       if ((c0 == charArray[0])
2242         && (c1 == charArray[1])
2243         && (c2 == charArray[2])
2244         && (c3 == charArray[3])
2245         && (c4 == charArray[4])
2246         && (c5 == charArray[5]))
2247         return charArray;
2248     }
2249     //--------add the entry-------
2250     if (++max >= InternalTableSize)
2251       max = 0;
2252     char[] r;
2253     table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2254     newEntry6 = max;
2255     return r;
2256   }
2257
2258   public final void pushLineSeparator() throws InvalidInputException {
2259     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2260     final int INCREMENT = 250;
2261
2262     if (this.checkNonExternalizedStringLiterals) {
2263       // reinitialize the current line for non externalize strings purpose
2264       currentLine = null;
2265     }
2266     //currentCharacter is at position currentPosition-1
2267
2268     // cr 000D
2269     if (currentCharacter == '\r') {
2270       int separatorPos = currentPosition - 1;
2271       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2272         return;
2273       //System.out.println("CR-" + separatorPos);
2274       try {
2275         lineEnds[++linePtr] = separatorPos;
2276       } catch (IndexOutOfBoundsException e) {
2277         //linePtr value is correct
2278         int oldLength = lineEnds.length;
2279         int[] old = lineEnds;
2280         lineEnds = new int[oldLength + INCREMENT];
2281         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2282         lineEnds[linePtr] = separatorPos;
2283       }
2284       // look-ahead for merged cr+lf
2285       try {
2286         if (source[currentPosition] == '\n') {
2287           //System.out.println("look-ahead LF-" + currentPosition);
2288           lineEnds[linePtr] = currentPosition;
2289           currentPosition++;
2290           wasAcr = false;
2291         } else {
2292           wasAcr = true;
2293         }
2294       } catch (IndexOutOfBoundsException e) {
2295         wasAcr = true;
2296       }
2297     } else {
2298       // lf 000A
2299       if (currentCharacter == '\n') {
2300         //must merge eventual cr followed by lf
2301         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2302           //System.out.println("merge LF-" + (currentPosition - 1));
2303           lineEnds[linePtr] = currentPosition - 1;
2304         } else {
2305           int separatorPos = currentPosition - 1;
2306           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2307             return;
2308           // System.out.println("LF-" + separatorPos);
2309           try {
2310             lineEnds[++linePtr] = separatorPos;
2311           } catch (IndexOutOfBoundsException e) {
2312             //linePtr value is correct
2313             int oldLength = lineEnds.length;
2314             int[] old = lineEnds;
2315             lineEnds = new int[oldLength + INCREMENT];
2316             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2317             lineEnds[linePtr] = separatorPos;
2318           }
2319         }
2320         wasAcr = false;
2321       }
2322     }
2323   }
2324   public final void pushUnicodeLineSeparator() {
2325     // isUnicode means that the \r or \n has been read as a unicode character
2326
2327     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2328
2329     final int INCREMENT = 250;
2330     //currentCharacter is at position currentPosition-1
2331
2332     if (this.checkNonExternalizedStringLiterals) {
2333       // reinitialize the current line for non externalize strings purpose
2334       currentLine = null;
2335     }
2336
2337     // cr 000D
2338     if (currentCharacter == '\r') {
2339       int separatorPos = currentPosition - 6;
2340       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2341         return;
2342       //System.out.println("CR-" + separatorPos);
2343       try {
2344         lineEnds[++linePtr] = separatorPos;
2345       } catch (IndexOutOfBoundsException e) {
2346         //linePtr value is correct
2347         int oldLength = lineEnds.length;
2348         int[] old = lineEnds;
2349         lineEnds = new int[oldLength + INCREMENT];
2350         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2351         lineEnds[linePtr] = separatorPos;
2352       }
2353       // look-ahead for merged cr+lf
2354       if (source[currentPosition] == '\n') {
2355         //System.out.println("look-ahead LF-" + currentPosition);
2356         lineEnds[linePtr] = currentPosition;
2357         currentPosition++;
2358         wasAcr = false;
2359       } else {
2360         wasAcr = true;
2361       }
2362     } else {
2363       // lf 000A
2364       if (currentCharacter == '\n') {
2365         //must merge eventual cr followed by lf
2366         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2367           //System.out.println("merge LF-" + (currentPosition - 1));
2368           lineEnds[linePtr] = currentPosition - 6;
2369         } else {
2370           int separatorPos = currentPosition - 6;
2371           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2372             return;
2373           // System.out.println("LF-" + separatorPos);
2374           try {
2375             lineEnds[++linePtr] = separatorPos;
2376           } catch (IndexOutOfBoundsException e) {
2377             //linePtr value is correct
2378             int oldLength = lineEnds.length;
2379             int[] old = lineEnds;
2380             lineEnds = new int[oldLength + INCREMENT];
2381             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2382             lineEnds[linePtr] = separatorPos;
2383           }
2384         }
2385         wasAcr = false;
2386       }
2387     }
2388   }
2389   public final void recordComment(boolean isJavadoc) {
2390
2391     // a new annotation comment is recorded
2392     try {
2393       commentStops[++commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2394     } catch (IndexOutOfBoundsException e) {
2395       int oldStackLength = commentStops.length;
2396       int[] oldStack = commentStops;
2397       commentStops = new int[oldStackLength + 30];
2398       System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2399       commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2400       //grows the positions buffers too
2401       int[] old = commentStarts;
2402       commentStarts = new int[oldStackLength + 30];
2403       System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2404     }
2405
2406     //the buffer is of a correct size here
2407     commentStarts[commentPtr] = startPosition;
2408   }
2409   public void resetTo(int begin, int end) {
2410     //reset the scanner to a given position where it may rescan again
2411
2412     diet = false;
2413     initialPosition = startPosition = currentPosition = begin;
2414     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2415     commentPtr = -1; // reset comment stack
2416   }
2417
2418   public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2419     // the string with "\\u" is a legal string of two chars \ and u
2420     //thus we use a direct access to the source (for regular cases).
2421
2422     //    if (unicodeAsBackSlash) {
2423     //      // consume next character
2424     //      unicodeAsBackSlash = false;
2425     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2426     //        && (source[currentPosition] == 'u')) {
2427     //        getNextUnicodeChar();
2428     //      } else {
2429     //        if (withoutUnicodePtr != 0) {
2430     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2431     //        }
2432     //      }
2433     //    } else
2434     currentCharacter = source[currentPosition++];
2435     switch (currentCharacter) {
2436       case '\'' :
2437         currentCharacter = '\'';
2438         break;
2439       case '\\' :
2440         currentCharacter = '\\';
2441         break;
2442       default :
2443         currentCharacter = '\\';
2444         currentPosition--;
2445     }
2446   }
2447
2448   public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2449     // the string with "\\u" is a legal string of two chars \ and u
2450     //thus we use a direct access to the source (for regular cases).
2451
2452     //    if (unicodeAsBackSlash) {
2453     //      // consume next character
2454     //      unicodeAsBackSlash = false;
2455     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2456     //        && (source[currentPosition] == 'u')) {
2457     //        getNextUnicodeChar();
2458     //      } else {
2459     //        if (withoutUnicodePtr != 0) {
2460     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2461     //        }
2462     //      }
2463     //    } else
2464     currentCharacter = source[currentPosition++];
2465     switch (currentCharacter) {
2466       //      case 'b' :
2467       //        currentCharacter = '\b';
2468       //        break;
2469       case 't' :
2470         currentCharacter = '\t';
2471         break;
2472       case 'n' :
2473         currentCharacter = '\n';
2474         break;
2475         //      case 'f' :
2476         //        currentCharacter = '\f';
2477         //        break;
2478       case 'r' :
2479         currentCharacter = '\r';
2480         break;
2481       case '\"' :
2482         currentCharacter = '\"';
2483         break;
2484       case '\'' :
2485         currentCharacter = '\'';
2486         break;
2487       case '\\' :
2488         currentCharacter = '\\';
2489         break;
2490       case '$' :
2491         currentCharacter = '$';
2492         break;
2493       default :
2494         // -----------octal escape--------------
2495         // OctalDigit
2496         // OctalDigit OctalDigit
2497         // ZeroToThree OctalDigit OctalDigit
2498
2499         int number = Character.getNumericValue(currentCharacter);
2500         if (number >= 0 && number <= 7) {
2501           boolean zeroToThreeNot = number > 3;
2502           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2503             int digit = Character.getNumericValue(currentCharacter);
2504             if (digit >= 0 && digit <= 7) {
2505               number = (number * 8) + digit;
2506               if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2507                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2508                   currentPosition--;
2509                 } else {
2510                   digit = Character.getNumericValue(currentCharacter);
2511                   if (digit >= 0 && digit <= 7) {
2512                     // has read \ZeroToThree OctalDigit OctalDigit
2513                     number = (number * 8) + digit;
2514                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2515                     currentPosition--;
2516                   }
2517                 }
2518               } else { // has read \OctalDigit NonDigit--> ignore last character
2519                 currentPosition--;
2520               }
2521             } else { // has read \OctalDigit NonOctalDigit--> ignore last character
2522               currentPosition--;
2523             }
2524           } else { // has read \OctalDigit --> ignore last character
2525             currentPosition--;
2526           }
2527           if (number > 255)
2528             throw new InvalidInputException(INVALID_ESCAPE);
2529           currentCharacter = (char) number;
2530         }
2531         //else
2532         //     throw new InvalidInputException(INVALID_ESCAPE);
2533     }
2534   }
2535
2536   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2537   //    return scanIdentifierOrKeyword( false );
2538   //  }
2539
2540   public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
2541     //test keywords
2542
2543     //first dispatch on the first char.
2544     //then the length. If there are several
2545     //keywors with the same length AND the same first char, then do another
2546     //disptach on the second char :-)...cool....but fast !
2547
2548     useAssertAsAnIndentifier = false;
2549
2550     while (getNextCharAsJavaIdentifierPart()) {
2551     };
2552
2553     if (isVariable) {
2554       if (new String(getCurrentTokenSource()).equals("$this")) {
2555         return TokenNamethis;
2556       }
2557       return TokenNameVariable;
2558     }
2559     int index, length;
2560     char[] data;
2561     char firstLetter;
2562     //    if (withoutUnicodePtr == 0)
2563
2564     //quick test on length == 1 but not on length > 12 while most identifier
2565     //have a length which is <= 12...but there are lots of identifier with
2566     //only one char....
2567
2568     //      {
2569     if ((length = currentPosition - startPosition) == 1)
2570       return TokenNameIdentifier;
2571     //  data = source;
2572     data = new char[length];
2573     index = startPosition;
2574     for (int i = 0; i < length; i++) {
2575       data[i] = Character.toLowerCase(source[index + i]);
2576     }
2577     index = 0;
2578     //    } else {
2579     //      if ((length = withoutUnicodePtr) == 1)
2580     //        return TokenNameIdentifier;
2581     //      // data = withoutUnicodeBuffer;
2582     //      data = new char[withoutUnicodeBuffer.length];
2583     //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2584     //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2585     //      }
2586     //      index = 1;
2587     //    }
2588
2589     firstLetter = data[index];
2590     switch (firstLetter) {
2591
2592       case 'a' : // as and array abstract
2593         switch (length) {
2594           case 2 : //as
2595             if ((data[++index] == 's')) {
2596               return TokenNameas;
2597             } else {
2598               return TokenNameIdentifier;
2599             }
2600           case 3 : //and
2601             if ((data[++index] == 'n') && (data[++index] == 'd')) {
2602               return TokenNameAND;
2603             } else {
2604               return TokenNameIdentifier;
2605             }
2606           case 5 : // array
2607             if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2608               return TokenNamearray;
2609             else
2610               return TokenNameIdentifier;
2611           case 8 :
2612             if ((data[++index] == 'b')
2613               && (data[++index] == 's')
2614               && (data[++index] == 't')
2615               && (data[++index] == 'r')
2616               && (data[++index] == 'a')
2617               && (data[++index] == 'c')
2618               && (data[++index] == 't'))
2619               return TokenNameabstract;
2620             else
2621               return TokenNameIdentifier;
2622           default :
2623             return TokenNameIdentifier;
2624         }
2625       case 'b' : //break
2626         switch (length) {
2627           case 5 :
2628             if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
2629               return TokenNamebreak;
2630             else
2631               return TokenNameIdentifier;
2632           default :
2633             return TokenNameIdentifier;
2634         }
2635
2636       case 'c' : //case catch class const continue
2637         switch (length) {
2638           case 4 :
2639             if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
2640               return TokenNamecase;
2641             else
2642               return TokenNameIdentifier;
2643           case 5 :
2644             if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
2645               return TokenNamecatch;
2646             if ((data[index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
2647               return TokenNameclass;
2648             if ((data[index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
2649               return TokenNameconst;
2650             else
2651               return TokenNameIdentifier;
2652           case 8 :
2653             if ((data[++index] == 'o')
2654               && (data[++index] == 'n')
2655               && (data[++index] == 't')
2656               && (data[++index] == 'i')
2657               && (data[++index] == 'n')
2658               && (data[++index] == 'u')
2659               && (data[++index] == 'e'))
2660               return TokenNamecontinue;
2661             else
2662               return TokenNameIdentifier;
2663           default :
2664             return TokenNameIdentifier;
2665         }
2666
2667       case 'd' : //define declare default do die
2668         switch (length) {
2669           case 2 :
2670             if ((data[++index] == 'o'))
2671               return TokenNamedo;
2672             else
2673               return TokenNameIdentifier;
2674           case 3 :
2675             if ((data[++index] == 'i') && (data[++index] == 'e'))
2676               return TokenNamedie;
2677             else
2678               return TokenNameIdentifier;
2679           case 6 :
2680             if ((data[++index] == 'e')
2681               && (data[++index] == 'f')
2682               && (data[++index] == 'i')
2683               && (data[++index] == 'n')
2684               && (data[++index] == 'e'))
2685               return TokenNamedefine;
2686             else
2687               return TokenNameIdentifier;
2688           case 7 :
2689             if ((data[++index] == 'e')
2690               && (data[++index] == 'c')
2691               && (data[++index] == 'l')
2692               && (data[++index] == 'a')
2693               && (data[++index] == 'r')
2694               && (data[++index] == 'e'))
2695               return TokenNamedeclare;
2696             index = 0;
2697             if ((data[++index] == 'e')
2698               && (data[++index] == 'f')
2699               && (data[++index] == 'a')
2700               && (data[++index] == 'u')
2701               && (data[++index] == 'l')
2702               && (data[++index] == 't'))
2703               return TokenNamedefault;
2704             else
2705               return TokenNameIdentifier;
2706           default :
2707             return TokenNameIdentifier;
2708         }
2709       case 'e' : //echo else exit elseif extends eval
2710         switch (length) {
2711           case 4 :
2712             if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
2713               return TokenNameecho;
2714             else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2715               return TokenNameelse;
2716             else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
2717               return TokenNameexit;
2718             else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
2719               return TokenNameeval;
2720             else
2721               return TokenNameIdentifier;
2722           case 5 : // endif empty
2723             if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
2724               return TokenNameendif;
2725             if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
2726               return TokenNameempty;
2727             else
2728               return TokenNameIdentifier;
2729           case 6 : // endfor
2730             if ((data[++index] == 'n')
2731               && (data[++index] == 'd')
2732               && (data[++index] == 'f')
2733               && (data[++index] == 'o')
2734               && (data[++index] == 'r'))
2735               return TokenNameendfor;
2736             else if (
2737               (data[index] == 'l')
2738                 && (data[++index] == 's')
2739                 && (data[++index] == 'e')
2740                 && (data[++index] == 'i')
2741                 && (data[++index] == 'f'))
2742               return TokenNameelseif;
2743             else
2744               return TokenNameIdentifier;
2745           case 7 :
2746             if ((data[++index] == 'x')
2747               && (data[++index] == 't')
2748               && (data[++index] == 'e')
2749               && (data[++index] == 'n')
2750               && (data[++index] == 'd')
2751               && (data[++index] == 's'))
2752               return TokenNameextends;
2753             else
2754               return TokenNameIdentifier;
2755           case 8 : // endwhile
2756             if ((data[++index] == 'n')
2757               && (data[++index] == 'd')
2758               && (data[++index] == 'w')
2759               && (data[++index] == 'h')
2760               && (data[++index] == 'i')
2761               && (data[++index] == 'l')
2762               && (data[++index] == 'e'))
2763               return TokenNameendwhile;
2764             else
2765               return TokenNameIdentifier;
2766           case 9 : // endswitch
2767             if ((data[++index] == 'n')
2768               && (data[++index] == 'd')
2769               && (data[++index] == 's')
2770               && (data[++index] == 'w')
2771               && (data[++index] == 'i')
2772               && (data[++index] == 't')
2773               && (data[++index] == 'c')
2774               && (data[++index] == 'h'))
2775               return TokenNameendswitch;
2776             else
2777               return TokenNameIdentifier;
2778           case 10 : // enddeclare
2779             if ((data[++index] == 'n')
2780               && (data[++index] == 'd')
2781               && (data[++index] == 'd')
2782               && (data[++index] == 'e')
2783               && (data[++index] == 'c')
2784               && (data[++index] == 'l')
2785               && (data[++index] == 'a')
2786               && (data[++index] == 'r')
2787               && (data[++index] == 'e'))
2788               return TokenNameendforeach;
2789             index = 0;
2790             if ((data[++index] == 'n') // endforeach
2791               && (data[++index] == 'd')
2792               && (data[++index] == 'f')
2793               && (data[++index] == 'o')
2794               && (data[++index] == 'r')
2795               && (data[++index] == 'e')
2796               && (data[++index] == 'a')
2797               && (data[++index] == 'c')
2798               && (data[++index] == 'h'))
2799               return TokenNameendforeach;
2800             else
2801               return TokenNameIdentifier;
2802
2803           default :
2804             return TokenNameIdentifier;
2805         }
2806
2807       case 'f' : //for false final function
2808         switch (length) {
2809           case 3 :
2810             if ((data[++index] == 'o') && (data[++index] == 'r'))
2811               return TokenNamefor;
2812             else
2813               return TokenNameIdentifier;
2814           case 5 :
2815             if ((data[++index] == 'a') && (data[++index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2816               return TokenNamefalse;
2817             if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
2818               return TokenNamefinal;
2819             else
2820               return TokenNameIdentifier;
2821           case 7 : // foreach
2822             if ((data[++index] == 'o')
2823               && (data[++index] == 'r')
2824               && (data[++index] == 'e')
2825               && (data[++index] == 'a')
2826               && (data[++index] == 'c')
2827               && (data[++index] == 'h'))
2828               return TokenNameforeach;
2829             else
2830               return TokenNameIdentifier;
2831           case 8 : // function
2832             if ((data[++index] == 'u')
2833               && (data[++index] == 'n')
2834               && (data[++index] == 'c')
2835               && (data[++index] == 't')
2836               && (data[++index] == 'i')
2837               && (data[++index] == 'o')
2838               && (data[++index] == 'n'))
2839               return TokenNamefunction;
2840             else
2841               return TokenNameIdentifier;
2842           default :
2843             return TokenNameIdentifier;
2844         }
2845       case 'g' : //global
2846         if (length == 6) {
2847           if ((data[++index] == 'l')
2848             && (data[++index] == 'o')
2849             && (data[++index] == 'b')
2850             && (data[++index] == 'a')
2851             && (data[++index] == 'l')) {
2852             return TokenNameglobal;
2853           }
2854         }
2855         return TokenNameIdentifier;
2856
2857       case 'i' : //if int isset include include_once instanceof interface implements
2858         switch (length) {
2859           case 2 :
2860             if (data[++index] == 'f')
2861               return TokenNameif;
2862             else
2863               return TokenNameIdentifier;
2864             //          case 3 :
2865             //            if ((data[++index] == 'n') && (data[++index] == 't'))
2866             //              return TokenNameint;
2867             //            else
2868             //              return TokenNameIdentifier;
2869           case 5 :
2870             if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
2871               return TokenNameisset;
2872             else
2873               return TokenNameIdentifier;
2874           case 7 :
2875             if ((data[++index] == 'n')
2876               && (data[++index] == 'c')
2877               && (data[++index] == 'l')
2878               && (data[++index] == 'u')
2879               && (data[++index] == 'd')
2880               && (data[++index] == 'e'))
2881               return TokenNameinclude;
2882             else
2883               return TokenNameIdentifier;
2884           case 9 : // interface
2885             if ((data[++index] == 'n')
2886               && (data[++index] == 't')
2887               && (data[++index] == 'e')
2888               && (data[++index] == 'r')
2889               && (data[++index] == 'f')
2890               && (data[++index] == 'a')
2891               && (data[++index] == 'c')
2892               && (data[++index] == 'e'))
2893               return TokenNameinterface;
2894             else
2895               return TokenNameIdentifier;
2896           case 10 : // instanceof
2897             if ((data[++index] == 'n')
2898               && (data[++index] == 's')
2899               && (data[++index] == 't')
2900               && (data[++index] == 'a')
2901               && (data[++index] == 'n')
2902               && (data[++index] == 'c')
2903               && (data[++index] == 'e')
2904               && (data[++index] == 'o')
2905               && (data[++index] == 'f'))
2906               return TokenNameinstanceof;
2907             if ((data[index] == 'm')
2908               && (data[++index] == 'p')
2909               && (data[++index] == 'l')
2910               && (data[++index] == 'e')
2911               && (data[++index] == 'm')
2912               && (data[++index] == 'e')
2913               && (data[++index] == 'n')
2914               && (data[++index] == 't')
2915               && (data[++index] == 's'))
2916               return TokenNameimplements;
2917             else
2918               return TokenNameIdentifier;
2919           case 12 :
2920             if ((data[++index] == 'n')
2921               && (data[++index] == 'c')
2922               && (data[++index] == 'l')
2923               && (data[++index] == 'u')
2924               && (data[++index] == 'd')
2925               && (data[++index] == 'e')
2926               && (data[++index] == '_')
2927               && (data[++index] == 'o')
2928               && (data[++index] == 'n')
2929               && (data[++index] == 'c')
2930               && (data[++index] == 'e'))
2931               return TokenNameinclude_once;
2932             else
2933               return TokenNameIdentifier;
2934           default :
2935             return TokenNameIdentifier;
2936         }
2937
2938       case 'l' : //list
2939         if (length == 4) {
2940           if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
2941             return TokenNamelist;
2942           }
2943         }
2944         return TokenNameIdentifier;
2945
2946       case 'n' : // new null
2947         switch (length) {
2948           case 3 :
2949             if ((data[++index] == 'e') && (data[++index] == 'w'))
2950               return TokenNamenew;
2951             else
2952               return TokenNameIdentifier;
2953           case 4 :
2954             if ((data[++index] == 'u') && (data[++index] == 'l') && (data[++index] == 'l'))
2955               return TokenNamenull;
2956             else
2957               return TokenNameIdentifier;
2958
2959           default :
2960             return TokenNameIdentifier;
2961         }
2962       case 'o' : // or old_function
2963         if (length == 2) {
2964           if (data[++index] == 'r') {
2965             return TokenNameOR;
2966           }
2967         }
2968         //        if (length == 12) {
2969         //          if ((data[++index] == 'l')
2970         //            && (data[++index] == 'd')
2971         //            && (data[++index] == '_')
2972         //            && (data[++index] == 'f')
2973         //            && (data[++index] == 'u')
2974         //            && (data[++index] == 'n')
2975         //            && (data[++index] == 'c')
2976         //            && (data[++index] == 't')
2977         //            && (data[++index] == 'i')
2978         //            && (data[++index] == 'o')
2979         //            && (data[++index] == 'n')) {
2980         //            return TokenNameold_function;
2981         //          }
2982         //        }
2983         return TokenNameIdentifier;
2984
2985       case 'p' : // print public private protected
2986         switch (length) {
2987           case 5 :
2988             if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
2989               return TokenNameprint;
2990             } else
2991               return TokenNameIdentifier;
2992           case 6 :
2993             if ((data[++index] == 'u')
2994               && (data[++index] == 'b')
2995               && (data[++index] == 'l')
2996               && (data[++index] == 'i')
2997               && (data[++index] == 'c')) {
2998               return TokenNamepublic;
2999             } else
3000               return TokenNameIdentifier;
3001           case 7 :
3002             if ((data[++index] == 'r')
3003               && (data[++index] == 'i')
3004               && (data[++index] == 'v')
3005               && (data[++index] == 'a')
3006               && (data[++index] == 't')
3007               && (data[++index] == 'e')) {
3008               return TokenNameprivate;
3009             } else
3010               return TokenNameIdentifier;
3011           case 9 :
3012             if ((data[++index] == 'r')
3013               && (data[++index] == 'o')
3014               && (data[++index] == 't')
3015               && (data[++index] == 'e')
3016               && (data[++index] == 'c')
3017               && (data[++index] == 't')
3018               && (data[++index] == 'e')
3019               && (data[++index] == 'd')) {
3020               return TokenNameprotected;
3021             } else
3022               return TokenNameIdentifier;
3023         }
3024         return TokenNameIdentifier;
3025       case 'r' : //return require require_once
3026         if (length == 6) {
3027           if ((data[++index] == 'e')
3028             && (data[++index] == 't')
3029             && (data[++index] == 'u')
3030             && (data[++index] == 'r')
3031             && (data[++index] == 'n')) {
3032             return TokenNamereturn;
3033           }
3034         } else if (length == 7) {
3035           if ((data[++index] == 'e')
3036             && (data[++index] == 'q')
3037             && (data[++index] == 'u')
3038             && (data[++index] == 'i')
3039             && (data[++index] == 'r')
3040             && (data[++index] == 'e')) {
3041             return TokenNamerequire;
3042           }
3043         } else if (length == 12) {
3044           if ((data[++index] == 'e')
3045             && (data[++index] == 'q')
3046             && (data[++index] == 'u')
3047             && (data[++index] == 'i')
3048             && (data[++index] == 'r')
3049             && (data[++index] == 'e')
3050             && (data[++index] == '_')
3051             && (data[++index] == 'o')
3052             && (data[++index] == 'n')
3053             && (data[++index] == 'c')
3054             && (data[++index] == 'e')) {
3055             return TokenNamerequire_once;
3056           }
3057         } else
3058           return TokenNameIdentifier;
3059
3060       case 's' : //static switch
3061         switch (length) {
3062           case 6 :
3063             if (data[++index] == 't')
3064               if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3065                 return TokenNamestatic;
3066               } else
3067                 return TokenNameIdentifier;
3068             else if (
3069               (data[index] == 'w')
3070                 && (data[++index] == 'i')
3071                 && (data[++index] == 't')
3072                 && (data[++index] == 'c')
3073                 && (data[++index] == 'h'))
3074               return TokenNameswitch;
3075             else
3076               return TokenNameIdentifier;
3077           default :
3078             return TokenNameIdentifier;
3079         }
3080
3081       case 't' : // try true throw
3082         switch (length) {
3083           case 3 :
3084             if ((data[++index] == 'r') && (data[++index] == 'y'))
3085               return TokenNametry;
3086             else
3087               return TokenNameIdentifier;
3088           case 4 :
3089             if ((data[++index] == 'r') && (data[++index] == 'u') && (data[++index] == 'e'))
3090               return TokenNametrue;
3091             else
3092               return TokenNameIdentifier;
3093           case 5 :
3094             if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3095               return TokenNamethrow;
3096             else
3097               return TokenNameIdentifier;
3098
3099           default :
3100             return TokenNameIdentifier;
3101         }
3102       case 'u' : //use unset
3103         switch (length) {
3104           case 3 :
3105             if ((data[++index] == 's') && (data[++index] == 'e'))
3106               return TokenNameuse;
3107             else
3108               return TokenNameIdentifier;
3109           case 5 :
3110             if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3111               return TokenNameunset;
3112             else
3113               return TokenNameIdentifier;
3114           default :
3115             return TokenNameIdentifier;
3116         }
3117       case 'v' : //var
3118         switch (length) {
3119           case 3 :
3120             if ((data[++index] == 'a') && (data[++index] == 'r'))
3121               return TokenNamevar;
3122             else
3123               return TokenNameIdentifier;
3124
3125           default :
3126             return TokenNameIdentifier;
3127         }
3128
3129       case 'w' : //while
3130         switch (length) {
3131           case 5 :
3132             if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3133               return TokenNamewhile;
3134             else
3135               return TokenNameIdentifier;
3136             //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&&
3137             // (data[++index]=='p'))
3138             //return TokenNamewidefp ;
3139             //else
3140             //return TokenNameIdentifier;
3141           default :
3142             return TokenNameIdentifier;
3143         }
3144
3145       case 'x' : //xor
3146         switch (length) {
3147           case 3 :
3148             if ((data[++index] == 'o') && (data[++index] == 'r'))
3149               return TokenNameXOR;
3150             else
3151               return TokenNameIdentifier;
3152
3153           default :
3154             return TokenNameIdentifier;
3155         }
3156       default :
3157         return TokenNameIdentifier;
3158     }
3159   }
3160   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3161
3162     //when entering this method the currentCharacter is the firt
3163     //digit of the number , i.e. it may be preceeded by a . when
3164     //dotPrefix is true
3165
3166     boolean floating = dotPrefix;
3167     if ((!dotPrefix) && (currentCharacter == '0')) {
3168       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3169         //force the first char of the hexa number do exist...
3170         // consume next character
3171         unicodeAsBackSlash = false;
3172         currentCharacter = source[currentPosition++];
3173         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3174         //          && (source[currentPosition] == 'u')) {
3175         //          getNextUnicodeChar();
3176         //        } else {
3177         //          if (withoutUnicodePtr != 0) {
3178         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3179         //          }
3180         //        }
3181         if (Character.digit(currentCharacter, 16) == -1)
3182           throw new InvalidInputException(INVALID_HEXA);
3183         //---end forcing--
3184         while (getNextCharAsDigit(16)) {
3185         };
3186         //        if (getNextChar('l', 'L') >= 0)
3187         //          return TokenNameLongLiteral;
3188         //        else
3189         return TokenNameIntegerLiteral;
3190       }
3191
3192       //there is x or X in the number
3193       //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3194       if (getNextCharAsDigit()) {
3195         //-------------potential octal-----------------
3196         while (getNextCharAsDigit()) {
3197         };
3198
3199         //        if (getNextChar('l', 'L') >= 0) {
3200         //          return TokenNameLongLiteral;
3201         //        }
3202         //
3203         //        if (getNextChar('f', 'F') >= 0) {
3204         //          return TokenNameFloatingPointLiteral;
3205         //        }
3206
3207         if (getNextChar('d', 'D') >= 0) {
3208           return TokenNameDoubleLiteral;
3209         } else { //make the distinction between octal and float ....
3210           if (getNextChar('.')) { //bingo ! ....
3211             while (getNextCharAsDigit()) {
3212             };
3213             if (getNextChar('e', 'E') >= 0) {
3214               // consume next character
3215               unicodeAsBackSlash = false;
3216               currentCharacter = source[currentPosition++];
3217               //              if (((currentCharacter = source[currentPosition++]) == '\\')
3218               //                && (source[currentPosition] == 'u')) {
3219               //                getNextUnicodeChar();
3220               //              } else {
3221               //                if (withoutUnicodePtr != 0) {
3222               //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3223               //                }
3224               //              }
3225
3226               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3227                 // consume next character
3228                 unicodeAsBackSlash = false;
3229                 currentCharacter = source[currentPosition++];
3230                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3231                 //                  && (source[currentPosition] == 'u')) {
3232                 //                  getNextUnicodeChar();
3233                 //                } else {
3234                 //                  if (withoutUnicodePtr != 0) {
3235                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3236                 //                      currentCharacter;
3237                 //                  }
3238                 //                }
3239               }
3240               if (!Character.isDigit(currentCharacter))
3241                 throw new InvalidInputException(INVALID_FLOAT);
3242               while (getNextCharAsDigit()) {
3243               };
3244             }
3245             //            if (getNextChar('f', 'F') >= 0)
3246             //              return TokenNameFloatingPointLiteral;
3247             getNextChar('d', 'D'); //jump over potential d or D
3248             return TokenNameDoubleLiteral;
3249           } else {
3250             return TokenNameIntegerLiteral;
3251           }
3252         }
3253       } else {
3254         /* carry on */
3255       }
3256     }
3257
3258     while (getNextCharAsDigit()) {
3259     };
3260
3261     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3262     //      return TokenNameLongLiteral;
3263
3264     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3265       while (getNextCharAsDigit()) {
3266       };
3267       floating = true;
3268     }
3269
3270     //if floating is true both exponant and suffix may be optional
3271
3272     if (getNextChar('e', 'E') >= 0) {
3273       floating = true;
3274       // consume next character
3275       unicodeAsBackSlash = false;
3276       currentCharacter = source[currentPosition++];
3277       //      if (((currentCharacter = source[currentPosition++]) == '\\')
3278       //        && (source[currentPosition] == 'u')) {
3279       //        getNextUnicodeChar();
3280       //      } else {
3281       //        if (withoutUnicodePtr != 0) {
3282       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3283       //        }
3284       //      }
3285
3286       if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume next character
3287         unicodeAsBackSlash = false;
3288         currentCharacter = source[currentPosition++];
3289         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3290         //          && (source[currentPosition] == 'u')) {
3291         //          getNextUnicodeChar();
3292         //        } else {
3293         //          if (withoutUnicodePtr != 0) {
3294         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3295         //          }
3296         //        }
3297       }
3298       if (!Character.isDigit(currentCharacter))
3299         throw new InvalidInputException(INVALID_FLOAT);
3300       while (getNextCharAsDigit()) {
3301       };
3302     }
3303
3304     if (getNextChar('d', 'D') >= 0)
3305       return TokenNameDoubleLiteral;
3306     //    if (getNextChar('f', 'F') >= 0)
3307     //      return TokenNameFloatingPointLiteral;
3308
3309     //the long flag has been tested before
3310
3311     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3312   }
3313   /**
3314    * Search the line number corresponding to a specific position
3315    *
3316    */
3317   public final int getLineNumber(int position) {
3318
3319     if (lineEnds == null)
3320       return 1;
3321     int length = linePtr + 1;
3322     if (length == 0)
3323       return 1;
3324     int g = 0, d = length - 1;
3325     int m = 0;
3326     while (g <= d) {
3327       m = (g + d) / 2;
3328       if (position < lineEnds[m]) {
3329         d = m - 1;
3330       } else if (position > lineEnds[m]) {
3331         g = m + 1;
3332       } else {
3333         return m + 1;
3334       }
3335     }
3336     if (position < lineEnds[m]) {
3337       return m + 1;
3338     }
3339     return m + 2;
3340   }
3341
3342   public void setPHPMode(boolean mode) {
3343     phpMode = mode;
3344   }
3345
3346   public final void setSource(char[] source) {
3347     //the source-buffer is set to sourceString
3348
3349     if (source == null) {
3350       this.source = new char[0];
3351     } else {
3352       this.source = source;
3353     }
3354     startPosition = -1;
3355     initialPosition = currentPosition = 0;
3356     containsAssertKeyword = false;
3357     withoutUnicodeBuffer = new char[this.source.length];
3358
3359   }
3360
3361   public String toString() {
3362     if (startPosition == source.length)
3363       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3364     if (currentPosition > source.length)
3365       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3366
3367     char front[] = new char[startPosition];
3368     System.arraycopy(source, 0, front, 0, startPosition);
3369
3370     int middleLength = (currentPosition - 1) - startPosition + 1;
3371     char middle[];
3372     if (middleLength > -1) {
3373       middle = new char[middleLength];
3374       System.arraycopy(source, startPosition, middle, 0, middleLength);
3375     } else {
3376       middle = new char[0];
3377     }
3378
3379     char end[] = new char[source.length - (currentPosition - 1)];
3380     System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3381
3382     return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3383     + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3384     + new String(end);
3385   }
3386   public final String toStringAction(int act) {
3387
3388     switch (act) {
3389       case TokenNameERROR :
3390         return "ScannerError"; // + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3391       case TokenNameStopPHP :
3392         return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3393       case TokenNameIdentifier :
3394         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3395       case TokenNameVariable :
3396         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3397       case TokenNameas :
3398         return "as"; //$NON-NLS-1$
3399       case TokenNamebreak :
3400         return "break"; //$NON-NLS-1$
3401       case TokenNamecase :
3402         return "case"; //$NON-NLS-1$
3403       case TokenNameclass :
3404         return "class"; //$NON-NLS-1$
3405       case TokenNamecontinue :
3406         return "continue"; //$NON-NLS-1$
3407       case TokenNamedefault :
3408         return "default"; //$NON-NLS-1$
3409       case TokenNamedefine :
3410         return "define"; //$NON-NLS-1$
3411       case TokenNamedo :
3412         return "do"; //$NON-NLS-1$
3413       case TokenNameecho :
3414         return "echo"; //$NON-NLS-1$
3415       case TokenNameelse :
3416         return "else"; //$NON-NLS-1$
3417       case TokenNameelseif :
3418         return "elseif"; //$NON-NLS-1$
3419       case TokenNameendfor :
3420         return "endfor"; //$NON-NLS-1$
3421       case TokenNameendforeach :
3422         return "endforeach"; //$NON-NLS-1$
3423       case TokenNameendif :
3424         return "endif"; //$NON-NLS-1$
3425       case TokenNameendswitch :
3426         return "endswitch"; //$NON-NLS-1$
3427       case TokenNameendwhile :
3428         return "endwhile"; //$NON-NLS-1$
3429       case TokenNameextends :
3430         return "extends"; //$NON-NLS-1$
3431       case TokenNamefalse :
3432         return "false"; //$NON-NLS-1$
3433       case TokenNamefor :
3434         return "for"; //$NON-NLS-1$
3435       case TokenNameforeach :
3436         return "foreach"; //$NON-NLS-1$
3437       case TokenNamefunction :
3438         return "function"; //$NON-NLS-1$
3439       case TokenNameglobal :
3440         return "global"; //$NON-NLS-1$
3441       case TokenNameif :
3442         return "if"; //$NON-NLS-1$
3443       case TokenNameinclude :
3444         return "include"; //$NON-NLS-1$
3445       case TokenNameinclude_once :
3446         return "include_once"; //$NON-NLS-1$
3447       case TokenNamelist :
3448         return "list"; //$NON-NLS-1$
3449       case TokenNamenew :
3450         return "new"; //$NON-NLS-1$
3451       case TokenNamenull :
3452         return "null"; //$NON-NLS-1$
3453       case TokenNameprint :
3454         return "print"; //$NON-NLS-1$
3455       case TokenNamerequire :
3456         return "require"; //$NON-NLS-1$
3457       case TokenNamerequire_once :
3458         return "require_once"; //$NON-NLS-1$
3459       case TokenNamereturn :
3460         return "return"; //$NON-NLS-1$
3461       case TokenNamestatic :
3462         return "static"; //$NON-NLS-1$
3463       case TokenNameswitch :
3464         return "switch"; //$NON-NLS-1$
3465       case TokenNametrue :
3466         return "true"; //$NON-NLS-1$
3467       case TokenNamevar :
3468         return "var"; //$NON-NLS-1$
3469       case TokenNamewhile :
3470         return "while"; //$NON-NLS-1$
3471       case TokenNamethis :
3472         return "$this"; //$NON-NLS-1$
3473       case TokenNameIntegerLiteral :
3474         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3475       case TokenNameDoubleLiteral :
3476         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3477       case TokenNameStringLiteral :
3478         return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3479       case TokenNameStringConstant :
3480         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3481       case TokenNameStringInterpolated :
3482         return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3483       case TokenNameHEREDOC :
3484         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3485
3486       case TokenNamePLUS_PLUS :
3487         return "++"; //$NON-NLS-1$
3488       case TokenNameMINUS_MINUS :
3489         return "--"; //$NON-NLS-1$
3490       case TokenNameEQUAL_EQUAL :
3491         return "=="; //$NON-NLS-1$
3492       case TokenNameEQUAL_EQUAL_EQUAL :
3493         return "==="; //$NON-NLS-1$
3494       case TokenNameEQUAL_GREATER :
3495         return "=>"; //$NON-NLS-1$
3496       case TokenNameLESS_EQUAL :
3497         return "<="; //$NON-NLS-1$
3498       case TokenNameGREATER_EQUAL :
3499         return ">="; //$NON-NLS-1$
3500       case TokenNameNOT_EQUAL :
3501         return "!="; //$NON-NLS-1$
3502       case TokenNameNOT_EQUAL_EQUAL :
3503         return "!=="; //$NON-NLS-1$
3504       case TokenNameLEFT_SHIFT :
3505         return "<<"; //$NON-NLS-1$
3506       case TokenNameRIGHT_SHIFT :
3507         return ">>"; //$NON-NLS-1$
3508       case TokenNamePLUS_EQUAL :
3509         return "+="; //$NON-NLS-1$
3510       case TokenNameMINUS_EQUAL :
3511         return "-="; //$NON-NLS-1$
3512       case TokenNameMULTIPLY_EQUAL :
3513         return "*="; //$NON-NLS-1$
3514       case TokenNameDIVIDE_EQUAL :
3515         return "/="; //$NON-NLS-1$
3516       case TokenNameAND_EQUAL :
3517         return "&="; //$NON-NLS-1$
3518       case TokenNameOR_EQUAL :
3519         return "|="; //$NON-NLS-1$
3520       case TokenNameXOR_EQUAL :
3521         return "^="; //$NON-NLS-1$
3522       case TokenNameREMAINDER_EQUAL :
3523         return "%="; //$NON-NLS-1$
3524       case TokenNameLEFT_SHIFT_EQUAL :
3525         return "<<="; //$NON-NLS-1$
3526       case TokenNameRIGHT_SHIFT_EQUAL :
3527         return ">>="; //$NON-NLS-1$
3528       case TokenNameOR_OR :
3529         return "||"; //$NON-NLS-1$
3530       case TokenNameAND_AND :
3531         return "&&"; //$NON-NLS-1$
3532       case TokenNamePLUS :
3533         return "+"; //$NON-NLS-1$
3534       case TokenNameMINUS :
3535         return "-"; //$NON-NLS-1$
3536       case TokenNameMINUS_GREATER :
3537         return "->";
3538       case TokenNameNOT :
3539         return "!"; //$NON-NLS-1$
3540       case TokenNameREMAINDER :
3541         return "%"; //$NON-NLS-1$
3542       case TokenNameXOR :
3543         return "^"; //$NON-NLS-1$
3544       case TokenNameAND :
3545         return "&"; //$NON-NLS-1$
3546       case TokenNameMULTIPLY :
3547         return "*"; //$NON-NLS-1$
3548       case TokenNameOR :
3549         return "|"; //$NON-NLS-1$
3550       case TokenNameTWIDDLE :
3551         return "~"; //$NON-NLS-1$
3552       case TokenNameTWIDDLE_EQUAL :
3553         return "~="; //$NON-NLS-1$
3554       case TokenNameDIVIDE :
3555         return "/"; //$NON-NLS-1$
3556       case TokenNameGREATER :
3557         return ">"; //$NON-NLS-1$
3558       case TokenNameLESS :
3559         return "<"; //$NON-NLS-1$
3560       case TokenNameLPAREN :
3561         return "("; //$NON-NLS-1$
3562       case TokenNameRPAREN :
3563         return ")"; //$NON-NLS-1$
3564       case TokenNameLBRACE :
3565         return "{"; //$NON-NLS-1$
3566       case TokenNameRBRACE :
3567         return "}"; //$NON-NLS-1$
3568       case TokenNameLBRACKET :
3569         return "["; //$NON-NLS-1$
3570       case TokenNameRBRACKET :
3571         return "]"; //$NON-NLS-1$
3572       case TokenNameSEMICOLON :
3573         return ";"; //$NON-NLS-1$
3574       case TokenNameQUESTION :
3575         return "?"; //$NON-NLS-1$
3576       case TokenNameCOLON :
3577         return ":"; //$NON-NLS-1$
3578       case TokenNameCOMMA :
3579         return ","; //$NON-NLS-1$
3580       case TokenNameDOT :
3581         return "."; //$NON-NLS-1$
3582       case TokenNameEQUAL :
3583         return "="; //$NON-NLS-1$
3584       case TokenNameAT :
3585         return "@";
3586       case TokenNameDOLLAR_LBRACE :
3587         return "${";
3588       case TokenNameEOF :
3589         return "EOF"; //$NON-NLS-1$
3590       case TokenNameWHITESPACE :
3591         return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3592       case TokenNameCOMMENT_LINE :
3593         return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3594       case TokenNameCOMMENT_BLOCK :
3595         return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3596       case TokenNameCOMMENT_PHPDOC :
3597         return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3598       case TokenNameHTML :
3599         return "HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3600       default :
3601         return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3602     }
3603   }
3604
3605   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
3606     this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
3607   }
3608
3609   public Scanner(
3610     boolean tokenizeComments,
3611     boolean tokenizeWhiteSpace,
3612     boolean checkNonExternalizedStringLiterals,
3613     boolean assertMode) {
3614     this.eofPosition = Integer.MAX_VALUE;
3615     this.tokenizeComments = tokenizeComments;
3616     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3617     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3618     this.assertMode = assertMode;
3619   }
3620
3621   private void checkNonExternalizeString() throws InvalidInputException {
3622     if (currentLine == null)
3623       return;
3624     parseTags(currentLine);
3625   }
3626
3627   private void parseTags(NLSLine line) throws InvalidInputException {
3628     String s = new String(getCurrentTokenSource());
3629     int pos = s.indexOf(TAG_PREFIX);
3630     int lineLength = line.size();
3631     while (pos != -1) {
3632       int start = pos + TAG_PREFIX_LENGTH;
3633       int end = s.indexOf(TAG_POSTFIX, start);
3634       String index = s.substring(start, end);
3635       int i = 0;
3636       try {
3637         i = Integer.parseInt(index) - 1;
3638         // Tags are one based not zero based.
3639       } catch (NumberFormatException e) {
3640         i = -1; // we don't want to consider this as a valid NLS tag
3641       }
3642       if (line.exists(i)) {
3643         line.set(i, null);
3644       }
3645       pos = s.indexOf(TAG_PREFIX, start);
3646     }
3647
3648     this.nonNLSStrings = new StringLiteral[lineLength];
3649     int nonNLSCounter = 0;
3650     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3651       StringLiteral literal = (StringLiteral) iterator.next();
3652       if (literal != null) {
3653         this.nonNLSStrings[nonNLSCounter++] = literal;
3654       }
3655     }
3656     if (nonNLSCounter == 0) {
3657       this.nonNLSStrings = null;
3658       currentLine = null;
3659       return;
3660     }
3661     this.wasNonExternalizedStringLiteral = true;
3662     if (nonNLSCounter != lineLength) {
3663       System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
3664     }
3665     currentLine = null;
3666   }
3667
3668   public final void scanEscapeCharacter() throws InvalidInputException {
3669     // the string with "\\u" is a legal string of two chars \ and u
3670     //thus we use a direct access to the source (for regular cases).
3671
3672     if (unicodeAsBackSlash) {
3673       // consume next character
3674       unicodeAsBackSlash = false;
3675       //                        if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
3676       //                                getNextUnicodeChar();
3677       //                        } else {
3678       if (withoutUnicodePtr != 0) {
3679         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3680         //                              }
3681       }
3682     } else
3683       currentCharacter = source[currentPosition++];
3684     switch (currentCharacter) {
3685       case 'b' :
3686         currentCharacter = '\b';
3687         break;
3688       case 't' :
3689         currentCharacter = '\t';
3690         break;
3691       case 'n' :
3692         currentCharacter = '\n';
3693         break;
3694       case 'f' :
3695         currentCharacter = '\f';
3696         break;
3697       case 'r' :
3698         currentCharacter = '\r';
3699         break;
3700       case '\"' :
3701         currentCharacter = '\"';
3702         break;
3703       case '\'' :
3704         currentCharacter = '\'';
3705         break;
3706       case '\\' :
3707         currentCharacter = '\\';
3708         break;
3709       default :
3710         // -----------octal escape--------------
3711         // OctalDigit
3712         // OctalDigit OctalDigit
3713         // ZeroToThree OctalDigit OctalDigit
3714
3715         int number = Character.getNumericValue(currentCharacter);
3716         if (number >= 0 && number <= 7) {
3717           boolean zeroToThreeNot = number > 3;
3718           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3719             int digit = Character.getNumericValue(currentCharacter);
3720             if (digit >= 0 && digit <= 7) {
3721               number = (number * 8) + digit;
3722               if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3723                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
3724                   currentPosition--;
3725                 } else {
3726                   digit = Character.getNumericValue(currentCharacter);
3727                   if (digit >= 0 && digit <= 7) { // has read \ZeroToThree OctalDigit OctalDigit
3728                     number = (number * 8) + digit;
3729                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
3730                     currentPosition--;
3731                   }
3732                 }
3733               } else { // has read \OctalDigit NonDigit--> ignore last character
3734                 currentPosition--;
3735               }
3736             } else { // has read \OctalDigit NonOctalDigit--> ignore last character
3737               currentPosition--;
3738             }
3739           } else { // has read \OctalDigit --> ignore last character
3740             currentPosition--;
3741           }
3742           if (number > 255)
3743             throw new InvalidInputException(INVALID_ESCAPE);
3744           currentCharacter = (char) number;
3745         } else
3746           throw new InvalidInputException(INVALID_ESCAPE);
3747     }
3748   }
3749
3750   // chech presence of task: tags
3751   public void checkTaskTag(int commentStart, int commentEnd) {
3752
3753     // only look for newer task: tags
3754     if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
3755       return;
3756     }
3757     int foundTaskIndex = this.foundTaskCount;
3758     nextChar : for (int i = commentStart; i < commentEnd && i < this.eofPosition; i++) {
3759
3760       char[] tag = null;
3761       char[] priority = null;
3762
3763       // check for tag occurrence
3764       nextTag : for (int itag = 0; itag < this.taskTags.length; itag++) {
3765         tag = this.taskTags[itag];
3766         priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
3767         int tagLength = tag.length;
3768         for (int t = 0; t < tagLength; t++) {
3769           if (this.source[i + t] != tag[t])
3770             continue nextTag;
3771         }
3772
3773         if (this.foundTaskTags == null) {
3774           this.foundTaskTags = new char[5][];
3775           this.foundTaskMessages = new char[5][];
3776           this.foundTaskPriorities = new char[5][];
3777           this.foundTaskPositions = new int[5][];
3778         } else if (this.foundTaskCount == this.foundTaskTags.length) {
3779           System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
3780           System.arraycopy(
3781             this.foundTaskMessages,
3782             0,
3783             this.foundTaskMessages = new char[this.foundTaskCount * 2][],
3784             0,
3785             this.foundTaskCount);
3786           System.arraycopy(
3787             this.foundTaskPriorities,
3788             0,
3789             this.foundTaskPriorities = new char[this.foundTaskCount * 2][],
3790             0,
3791             this.foundTaskCount);
3792           System.arraycopy(
3793             this.foundTaskPositions,
3794             0,
3795             this.foundTaskPositions = new int[this.foundTaskCount * 2][],
3796             0,
3797             this.foundTaskCount);
3798         }
3799         this.foundTaskTags[this.foundTaskCount] = tag;
3800         this.foundTaskPriorities[this.foundTaskCount] = priority;
3801         this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
3802         this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
3803         this.foundTaskCount++;
3804
3805         i += tagLength - 1; // will be incremented when looping
3806       }
3807     }
3808
3809     for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
3810       // retrieve message start and end positions
3811       int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
3812       int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
3813       // at most beginning of next task
3814       if (max_value < msgStart)
3815         max_value = msgStart; // would only occur if tag is before EOF.
3816       int end = -1;
3817       char c;
3818
3819       for (int j = msgStart; j < max_value; j++) {
3820         if ((c = this.source[j]) == '\n' || c == '\r') {
3821           end = j - 1;
3822           break;
3823         }
3824       }
3825
3826       if (end == -1) {
3827         for (int j = max_value; j > msgStart; j--) {
3828           if ((c = this.source[j]) == '*') {
3829             end = j - 1;
3830             break;
3831           }
3832         }
3833         if (end == -1)
3834           end = max_value;
3835       }
3836
3837       if (msgStart == end)
3838         continue; // empty
3839
3840       // trim the message
3841       while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
3842         end--;
3843       while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
3844         msgStart++;
3845
3846       // update the end position of the task
3847       this.foundTaskPositions[i][1] = end;
3848
3849       // get the message source
3850       final int messageLength = end - msgStart + 1;
3851       char[] message = new char[messageLength];
3852
3853       System.arraycopy(source, msgStart, message, 0, messageLength);
3854       this.foundTaskMessages[i] = message;
3855     }
3856   }
3857
3858 }