net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java

   1 /***********************************************************************************************************************************
   2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
   3  * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
   4  * and is available at http://www.eclipse.org/legal/cpl-v05.html
   5  *
   6  * Contributors: IBM Corporation - initial API and implementation
   7  **********************************************************************************************************************************/
   8 package net.sourceforge.phpdt.internal.compiler.parser;
   9
  10 import java.util.ArrayList;
  11 import java.util.Iterator;
  12 import java.util.List;
  13 import java.util.Stack;
  14
  15 import net.sourceforge.phpdt.core.compiler.CharOperation;
  16 import net.sourceforge.phpdt.core.compiler.IScanner;
  17 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
  18 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
  19 import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
  20
  21 public class Scanner implements IScanner, ITerminalSymbols {
  22   /*
  23    * APIs ares - getNextToken() which return the current type of the token (this value is not memorized by the scanner) -
  24    * getCurrentTokenSource() which provides with the token "REAL" source (aka all unicode have been transformed into a correct char) -
  25    * sourceStart gives the position into the stream - currentPosition-1 gives the sourceEnd position into the stream
  26    */
  27   // 1.4 feature
  28   private boolean assertMode;
  29
  30   public boolean useAssertAsAnIndentifier = false;
  31
  32   //flag indicating if processed source contains occurrences of keyword assert
  33   public boolean containsAssertKeyword = false;
  34
  35   public boolean recordLineSeparator;
  36
  37   public boolean ignorePHPOneLiner = false;
  38
  39   public boolean phpMode = false;
  40
  41   public Stack encapsedStringStack = null;
  42
  43   public char currentCharacter;
  44
  45   public int startPosition;
  46
  47   public int currentPosition;
  48
  49   public int initialPosition, eofPosition;
  50
  51   // after this position eof are generated instead of real token from the
  52   // source
  53   public boolean tokenizeComments;
  54
  55   public boolean tokenizeWhiteSpace;
  56
  57   public boolean tokenizeStrings;
  58
  59   //source should be viewed as a window (aka a part)
  60   //of a entire very large stream
  61   public char source[];
  62
  63   //unicode support
  64   public char[] withoutUnicodeBuffer;
  65
  66   public int withoutUnicodePtr;
  67
  68   //when == 0 ==> no unicode in the current token
  69   public boolean unicodeAsBackSlash = false;
  70
  71   public boolean scanningFloatLiteral = false;
  72
  73   //support for /** comments
  74   public int[] commentStops = new int[10];
  75
  76   public int[] commentStarts = new int[10];
  77
  78   public int commentPtr = -1; // no comment test with commentPtr value -1
  79
  80   protected int lastCommentLinePosition = -1;
  81
  82   //diet parsing support - jump over some method body when requested
  83   public boolean diet = false;
  84
  85   //support for the poor-line-debuggers ....
  86   //remember the position of the cr/lf
  87   public int[] lineEnds = new int[250];
  88
  89   public int linePtr = -1;
  90
  91   public boolean wasAcr = false;
  92
  93   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
  94
  95   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
  96
  97   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
  98
  99   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
 100
 101   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
 102
 103   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
 104
 105   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
 106
 107   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
 108
 109   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
 110
 111   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
 112
 113   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
 114
 115   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
 116
 117   //----------------optimized identifier managment------------------
 118   static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
 119       charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
 120       charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
 121       charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
 122       charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
 123       charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
 124       charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
 125       charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
 126       charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
 127
 128   static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
 129
 130   static final int TableSize = 30, InternalTableSize = 6;
 131
 132   //30*6 = 180 entries
 133   public static final int OptimizedLength = 6;
 134
 135   public/* static */
 136   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
 137
 138   // support for detecting non-externalized string literals
 139   int currentLineNr = -1;
 140
 141   int previousLineNr = -1;
 142
 143   NLSLine currentLine = null;
 144
 145   List lines = new ArrayList();
 146
 147   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
 148
 149   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
 150
 151   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
 152
 153   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
 154
 155   public StringLiteral[] nonNLSStrings = null;
 156
 157   public boolean checkNonExternalizedStringLiterals = true;
 158
 159   public boolean wasNonExternalizedStringLiteral = false;
 160   /* static */{
 161     for (int i = 0; i < 6; i++) {
 162       for (int j = 0; j < TableSize; j++) {
 163         for (int k = 0; k < InternalTableSize; k++) {
 164           charArray_length[i][j][k] = initCharArray;
 165         }
 166       }
 167     }
 168   }
 169
 170   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
 171
 172   public static final int RoundBracket = 0;
 173
 174   public static final int SquareBracket = 1;
 175
 176   public static final int CurlyBracket = 2;
 177
 178   public static final int BracketKinds = 3;
 179
 180   // task tag support
 181   public char[][] foundTaskTags = null;
 182
 183   public char[][] foundTaskMessages;
 184
 185   public char[][] foundTaskPriorities = null;
 186
 187   public int[][] foundTaskPositions;
 188
 189   public int foundTaskCount = 0;
 190
 191   public char[][] taskTags = null;
 192
 193   public char[][] taskPriorities = null;
 194
 195   public static final boolean DEBUG = false;
 196
 197   public static final boolean TRACE = false;
 198
 199   /**
 200    * Determines if the specified character is permissible as the first character in a PHP identifier
 201    */
 202   public static boolean isPHPIdentifierStart(char ch) {
 203     return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 204   }
 205
 206   /**
 207    * Determines if the specified character may be part of a PHP identifier as other than the first character
 208    */
 209   public static boolean isPHPIdentifierPart(char ch) {
 210     return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 211   }
 212
 213   public final boolean atEnd() {
 214     // This code is not relevant if source is
 215     // Only a part of the real stream input
 216     return source.length == currentPosition;
 217   }
 218
 219   public char[] getCurrentIdentifierSource() {
 220     //return the token REAL source (aka unicodes are precomputed)
 221     char[] result;
 222     //    if (withoutUnicodePtr != 0)
 223     //      //0 is used as a fast test flag so the real first char is in position 1
 224     //      System.arraycopy(
 225     //        withoutUnicodeBuffer,
 226     //        1,
 227     //        result = new char[withoutUnicodePtr],
 228     //        0,
 229     //        withoutUnicodePtr);
 230     //    else {
 231     int length = currentPosition - startPosition;
 232     switch (length) { // see OptimizedLength
 233     case 1:
 234       return optimizedCurrentTokenSource1();
 235     case 2:
 236       return optimizedCurrentTokenSource2();
 237     case 3:
 238       return optimizedCurrentTokenSource3();
 239     case 4:
 240       return optimizedCurrentTokenSource4();
 241     case 5:
 242       return optimizedCurrentTokenSource5();
 243     case 6:
 244       return optimizedCurrentTokenSource6();
 245     }
 246     //no optimization
 247     System.arraycopy(source, startPosition, result = new char[length], 0, length);
 248     //   }
 249     return result;
 250   }
 251
 252   public int getCurrentTokenEndPosition() {
 253     return this.currentPosition - 1;
 254   }
 255
 256   public final char[] getCurrentTokenSource() {
 257     // Return the token REAL source (aka unicodes are precomputed)
 258     char[] result;
 259     //    if (withoutUnicodePtr != 0)
 260     //      // 0 is used as a fast test flag so the real first char is in position 1
 261     //      System.arraycopy(
 262     //        withoutUnicodeBuffer,
 263     //        1,
 264     //        result = new char[withoutUnicodePtr],
 265     //        0,
 266     //        withoutUnicodePtr);
 267     //    else {
 268     int length;
 269     System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
 270     //    }
 271     return result;
 272   }
 273
 274   public final char[] getCurrentTokenSource(int startPos) {
 275     // Return the token REAL source (aka unicodes are precomputed)
 276     char[] result;
 277     //    if (withoutUnicodePtr != 0)
 278     //      // 0 is used as a fast test flag so the real first char is in position 1
 279     //      System.arraycopy(
 280     //        withoutUnicodeBuffer,
 281     //        1,
 282     //        result = new char[withoutUnicodePtr],
 283     //        0,
 284     //        withoutUnicodePtr);
 285     //    else {
 286     int length;
 287     System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
 288     //  }
 289     return result;
 290   }
 291
 292   public final char[] getCurrentTokenSourceString() {
 293     //return the token REAL source (aka unicodes are precomputed).
 294     //REMOVE the two " that are at the beginning and the end.
 295     char[] result;
 296     if (withoutUnicodePtr != 0)
 297       //0 is used as a fast test flag so the real first char is in position 1
 298       System.arraycopy(withoutUnicodeBuffer, 2,
 299       //2 is 1 (real start) + 1 (to jump over the ")
 300           result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
 301     else {
 302       int length;
 303       System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 304     }
 305     return result;
 306   }
 307
 308   public int getCurrentTokenStartPosition() {
 309     return this.startPosition;
 310   }
 311
 312   public final char[] getCurrentStringLiteralSource() {
 313     // Return the token REAL source (aka unicodes are precomputed)
 314     char[] result;
 315     int length;
 316     System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 317     //    }
 318     return result;
 319   }
 320
 321   /*
 322    * Search the source position corresponding to the end of a given line number
 323    *
 324    * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
 325    *
 326    * In case the given line number is inconsistent, answers -1.
 327    */
 328   public final int getLineEnd(int lineNumber) {
 329     if (lineEnds == null)
 330       return -1;
 331     if (lineNumber >= lineEnds.length)
 332       return -1;
 333     if (lineNumber <= 0)
 334       return -1;
 335     if (lineNumber == lineEnds.length - 1)
 336       return eofPosition;
 337     return lineEnds[lineNumber - 1];
 338     // next line start one character behind the lineEnd of the previous line
 339   }
 340
 341   /**
 342    * Search the source position corresponding to the beginning of a given line number
 343    *
 344    * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
 345    *
 346    * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
 347    *
 348    * In case the given line number is inconsistent, answers -1.
 349    */
 350   public final int getLineStart(int lineNumber) {
 351     if (lineEnds == null)
 352       return -1;
 353     if (lineNumber >= lineEnds.length)
 354       return -1;
 355     if (lineNumber <= 0)
 356       return -1;
 357     if (lineNumber == 1)
 358       return initialPosition;
 359     return lineEnds[lineNumber - 2] + 1;
 360     // next line start one character behind the lineEnd of the previous line
 361   }
 362
 363   public final boolean getNextChar(char testedChar) {
 364     //BOOLEAN
 365     //handle the case of unicode.
 366     //when a unicode appears then we must use a buffer that holds char
 367     // internal values
 368     //At the end of this method currentCharacter holds the new visited char
 369     //and currentPosition points right next after it
 370     //Both previous lines are true if the currentCharacter is == to the
 371     // testedChar
 372     //On false, no side effect has occured.
 373     //ALL getNextChar.... ARE OPTIMIZED COPIES
 374     int temp = currentPosition;
 375     try {
 376       currentCharacter = source[currentPosition++];
 377       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 378       //        && (source[currentPosition] == 'u')) {
 379       //        //-------------unicode traitement ------------
 380       //        int c1, c2, c3, c4;
 381       //        int unicodeSize = 6;
 382       //        currentPosition++;
 383       //        while (source[currentPosition] == 'u') {
 384       //          currentPosition++;
 385       //          unicodeSize++;
 386       //        }
 387       //
 388       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 389       //          || c1 < 0)
 390       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 391       //            || c2 < 0)
 392       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 393       //            || c3 < 0)
 394       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 395       //            || c4 < 0)) {
 396       //          currentPosition = temp;
 397       //          return false;
 398       //        }
 399       //
 400       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 401       //        if (currentCharacter != testedChar) {
 402       //          currentPosition = temp;
 403       //          return false;
 404       //        }
 405       //        unicodeAsBackSlash = currentCharacter == '\\';
 406       //
 407       //        //need the unicode buffer
 408       //        if (withoutUnicodePtr == 0) {
 409       //          //buffer all the entries that have been left aside....
 410       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 411       //          System.arraycopy(
 412       //            source,
 413       //            startPosition,
 414       //            withoutUnicodeBuffer,
 415       //            1,
 416       //            withoutUnicodePtr);
 417       //        }
 418       //        //fill the buffer with the char
 419       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 420       //        return true;
 421       //
 422       //      } //-------------end unicode traitement--------------
 423       //      else {
 424       if (currentCharacter != testedChar) {
 425         currentPosition = temp;
 426         return false;
 427       }
 428       unicodeAsBackSlash = false;
 429       //        if (withoutUnicodePtr != 0)
 430       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 431       return true;
 432       //      }
 433     } catch (IndexOutOfBoundsException e) {
 434       unicodeAsBackSlash = false;
 435       currentPosition = temp;
 436       return false;
 437     }
 438   }
 439
 440   public final int getNextChar(char testedChar1, char testedChar2) {
 441     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
 442     //test can be done with (x==0) for the first and (x>0) for the second
 443     //handle the case of unicode.
 444     //when a unicode appears then we must use a buffer that holds char
 445     // internal values
 446     //At the end of this method currentCharacter holds the new visited char
 447     //and currentPosition points right next after it
 448     //Both previous lines are true if the currentCharacter is == to the
 449     // testedChar1/2
 450     //On false, no side effect has occured.
 451     //ALL getNextChar.... ARE OPTIMIZED COPIES
 452     int temp = currentPosition;
 453     try {
 454       int result;
 455       currentCharacter = source[currentPosition++];
 456       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 457       //        && (source[currentPosition] == 'u')) {
 458       //        //-------------unicode traitement ------------
 459       //        int c1, c2, c3, c4;
 460       //        int unicodeSize = 6;
 461       //        currentPosition++;
 462       //        while (source[currentPosition] == 'u') {
 463       //          currentPosition++;
 464       //          unicodeSize++;
 465       //        }
 466       //
 467       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 468       //          || c1 < 0)
 469       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 470       //            || c2 < 0)
 471       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 472       //            || c3 < 0)
 473       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 474       //            || c4 < 0)) {
 475       //          currentPosition = temp;
 476       //          return 2;
 477       //        }
 478       //
 479       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 480       //        if (currentCharacter == testedChar1)
 481       //          result = 0;
 482       //        else if (currentCharacter == testedChar2)
 483       //          result = 1;
 484       //        else {
 485       //          currentPosition = temp;
 486       //          return -1;
 487       //        }
 488       //
 489       //        //need the unicode buffer
 490       //        if (withoutUnicodePtr == 0) {
 491       //          //buffer all the entries that have been left aside....
 492       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 493       //          System.arraycopy(
 494       //            source,
 495       //            startPosition,
 496       //            withoutUnicodeBuffer,
 497       //            1,
 498       //            withoutUnicodePtr);
 499       //        }
 500       //        //fill the buffer with the char
 501       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 502       //        return result;
 503       //      } //-------------end unicode traitement--------------
 504       //      else {
 505       if (currentCharacter == testedChar1)
 506         result = 0;
 507       else if (currentCharacter == testedChar2)
 508         result = 1;
 509       else {
 510         currentPosition = temp;
 511         return -1;
 512       }
 513       //        if (withoutUnicodePtr != 0)
 514       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 515       return result;
 516       //     }
 517     } catch (IndexOutOfBoundsException e) {
 518       currentPosition = temp;
 519       return -1;
 520     }
 521   }
 522
 523   public final boolean getNextCharAsDigit() {
 524     //BOOLEAN
 525     //handle the case of unicode.
 526     //when a unicode appears then we must use a buffer that holds char
 527     // internal values
 528     //At the end of this method currentCharacter holds the new visited char
 529     //and currentPosition points right next after it
 530     //Both previous lines are true if the currentCharacter is a digit
 531     //On false, no side effect has occured.
 532     //ALL getNextChar.... ARE OPTIMIZED COPIES
 533     int temp = currentPosition;
 534     try {
 535       currentCharacter = source[currentPosition++];
 536       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 537       //        && (source[currentPosition] == 'u')) {
 538       //        //-------------unicode traitement ------------
 539       //        int c1, c2, c3, c4;
 540       //        int unicodeSize = 6;
 541       //        currentPosition++;
 542       //        while (source[currentPosition] == 'u') {
 543       //          currentPosition++;
 544       //          unicodeSize++;
 545       //        }
 546       //
 547       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 548       //          || c1 < 0)
 549       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 550       //            || c2 < 0)
 551       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 552       //            || c3 < 0)
 553       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 554       //            || c4 < 0)) {
 555       //          currentPosition = temp;
 556       //          return false;
 557       //        }
 558       //
 559       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 560       //        if (!Character.isDigit(currentCharacter)) {
 561       //          currentPosition = temp;
 562       //          return false;
 563       //        }
 564       //
 565       //        //need the unicode buffer
 566       //        if (withoutUnicodePtr == 0) {
 567       //          //buffer all the entries that have been left aside....
 568       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 569       //          System.arraycopy(
 570       //            source,
 571       //            startPosition,
 572       //            withoutUnicodeBuffer,
 573       //            1,
 574       //            withoutUnicodePtr);
 575       //        }
 576       //        //fill the buffer with the char
 577       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 578       //        return true;
 579       //      } //-------------end unicode traitement--------------
 580       //      else {
 581       if (!Character.isDigit(currentCharacter)) {
 582         currentPosition = temp;
 583         return false;
 584       }
 585       //        if (withoutUnicodePtr != 0)
 586       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 587       return true;
 588       //      }
 589     } catch (IndexOutOfBoundsException e) {
 590       currentPosition = temp;
 591       return false;
 592     }
 593   }
 594
 595   public final boolean getNextCharAsDigit(int radix) {
 596     //BOOLEAN
 597     //handle the case of unicode.
 598     //when a unicode appears then we must use a buffer that holds char
 599     // internal values
 600     //At the end of this method currentCharacter holds the new visited char
 601     //and currentPosition points right next after it
 602     //Both previous lines are true if the currentCharacter is a digit base on
 603     // radix
 604     //On false, no side effect has occured.
 605     //ALL getNextChar.... ARE OPTIMIZED COPIES
 606     int temp = currentPosition;
 607     try {
 608       currentCharacter = source[currentPosition++];
 609       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 610       //        && (source[currentPosition] == 'u')) {
 611       //        //-------------unicode traitement ------------
 612       //        int c1, c2, c3, c4;
 613       //        int unicodeSize = 6;
 614       //        currentPosition++;
 615       //        while (source[currentPosition] == 'u') {
 616       //          currentPosition++;
 617       //          unicodeSize++;
 618       //        }
 619       //
 620       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 621       //          || c1 < 0)
 622       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 623       //            || c2 < 0)
 624       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 625       //            || c3 < 0)
 626       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 627       //            || c4 < 0)) {
 628       //          currentPosition = temp;
 629       //          return false;
 630       //        }
 631       //
 632       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 633       //        if (Character.digit(currentCharacter, radix) == -1) {
 634       //          currentPosition = temp;
 635       //          return false;
 636       //        }
 637       //
 638       //        //need the unicode buffer
 639       //        if (withoutUnicodePtr == 0) {
 640       //          //buffer all the entries that have been left aside....
 641       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 642       //          System.arraycopy(
 643       //            source,
 644       //            startPosition,
 645       //            withoutUnicodeBuffer,
 646       //            1,
 647       //            withoutUnicodePtr);
 648       //        }
 649       //        //fill the buffer with the char
 650       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 651       //        return true;
 652       //      } //-------------end unicode traitement--------------
 653       //      else {
 654       if (Character.digit(currentCharacter, radix) == -1) {
 655         currentPosition = temp;
 656         return false;
 657       }
 658       //        if (withoutUnicodePtr != 0)
 659       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 660       return true;
 661       //      }
 662     } catch (IndexOutOfBoundsException e) {
 663       currentPosition = temp;
 664       return false;
 665     }
 666   }
 667
 668   public boolean getNextCharAsJavaIdentifierPart() {
 669     //BOOLEAN
 670     //handle the case of unicode.
 671     //when a unicode appears then we must use a buffer that holds char
 672     // internal values
 673     //At the end of this method currentCharacter holds the new visited char
 674     //and currentPosition points right next after it
 675     //Both previous lines are true if the currentCharacter is a
 676     // JavaIdentifierPart
 677     //On false, no side effect has occured.
 678     //ALL getNextChar.... ARE OPTIMIZED COPIES
 679     int temp = currentPosition;
 680     try {
 681       currentCharacter = source[currentPosition++];
 682       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 683       //        && (source[currentPosition] == 'u')) {
 684       //        //-------------unicode traitement ------------
 685       //        int c1, c2, c3, c4;
 686       //        int unicodeSize = 6;
 687       //        currentPosition++;
 688       //        while (source[currentPosition] == 'u') {
 689       //          currentPosition++;
 690       //          unicodeSize++;
 691       //        }
 692       //
 693       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 694       //          || c1 < 0)
 695       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 696       //            || c2 < 0)
 697       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 698       //            || c3 < 0)
 699       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 700       //            || c4 < 0)) {
 701       //          currentPosition = temp;
 702       //          return false;
 703       //        }
 704       //
 705       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 706       //        if (!isPHPIdentifierPart(currentCharacter)) {
 707       //          currentPosition = temp;
 708       //          return false;
 709       //        }
 710       //
 711       //        //need the unicode buffer
 712       //        if (withoutUnicodePtr == 0) {
 713       //          //buffer all the entries that have been left aside....
 714       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 715       //          System.arraycopy(
 716       //            source,
 717       //            startPosition,
 718       //            withoutUnicodeBuffer,
 719       //            1,
 720       //            withoutUnicodePtr);
 721       //        }
 722       //        //fill the buffer with the char
 723       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 724       //        return true;
 725       //      } //-------------end unicode traitement--------------
 726       //      else {
 727       if (!isPHPIdentifierPart(currentCharacter)) {
 728         currentPosition = temp;
 729         return false;
 730       }
 731       //        if (withoutUnicodePtr != 0)
 732       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 733       return true;
 734       //      }
 735     } catch (IndexOutOfBoundsException e) {
 736       currentPosition = temp;
 737       return false;
 738     }
 739   }
 740
 741   public int getCastOrParen() {
 742     int tempPosition = currentPosition;
 743     char tempCharacter = currentCharacter;
 744     int tempToken = TokenNameLPAREN;
 745     boolean found = false;
 746     StringBuffer buf = new StringBuffer();
 747     try {
 748       do {
 749         currentCharacter = source[currentPosition++];
 750       } while (currentCharacter == ' ' || currentCharacter == '\t');
 751       while ((currentCharacter >= 'a' && currentCharacter <= 'z') || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
 752         buf.append(currentCharacter);
 753         currentCharacter = source[currentPosition++];
 754       }
 755       if (buf.length() >= 3 && buf.length() <= 7) {
 756         char[] data = buf.toString().toCharArray();
 757         int index = 0;
 758         switch (data.length) {
 759         case 3:
 760           // int
 761           if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
 762             found = true;
 763             tempToken = TokenNameintCAST;
 764           }
 765           break;
 766         case 4:
 767           // bool real
 768           if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
 769             found = true;
 770             tempToken = TokenNameboolCAST;
 771           } else {
 772             index = 0;
 773             if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
 774               found = true;
 775               tempToken = TokenNamedoubleCAST;
 776             }
 777           }
 778           break;
 779         case 5:
 780           // array unset float
 781           if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
 782               && (data[++index] == 'y')) {
 783             found = true;
 784             tempToken = TokenNamearrayCAST;
 785           } else {
 786             index = 0;
 787             if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
 788                 && (data[++index] == 't')) {
 789               found = true;
 790               tempToken = TokenNameunsetCAST;
 791             } else {
 792               index = 0;
 793               if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
 794                   && (data[++index] == 't')) {
 795                 found = true;
 796                 tempToken = TokenNamedoubleCAST;
 797               }
 798             }
 799           }
 800           break;
 801         case 6:
 802           // object string double
 803           if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
 804               && (data[++index] == 'c') && (data[++index] == 't')) {
 805             found = true;
 806             tempToken = TokenNameobjectCAST;
 807           } else {
 808             index = 0;
 809             if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
 810                 && (data[++index] == 'n') && (data[++index] == 'g')) {
 811               found = true;
 812               tempToken = TokenNamestringCAST;
 813             } else {
 814               index = 0;
 815               if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
 816                   && (data[++index] == 'l') && (data[++index] == 'e')) {
 817                 found = true;
 818                 tempToken = TokenNamedoubleCAST;
 819               }
 820             }
 821           }
 822           break;
 823         case 7:
 824           // boolean integer
 825           if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
 826               && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
 827             found = true;
 828             tempToken = TokenNameboolCAST;
 829           } else {
 830             index = 0;
 831             if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
 832                 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
 833               found = true;
 834               tempToken = TokenNameintCAST;
 835             }
 836           }
 837           break;
 838         }
 839         if (found) {
 840           while (currentCharacter == ' ' || currentCharacter == '\t') {
 841             currentCharacter = source[currentPosition++];
 842           }
 843           if (currentCharacter == ')') {
 844             return tempToken;
 845           }
 846         }
 847       }
 848     } catch (IndexOutOfBoundsException e) {
 849     }
 850     currentCharacter = tempCharacter;
 851     currentPosition = tempPosition;
 852     return TokenNameLPAREN;
 853   }
 854
 855   public void consumeStringInterpolated() throws InvalidInputException {
 856     try {
 857       // consume next character
 858       unicodeAsBackSlash = false;
 859       currentCharacter = source[currentPosition++];
 860       //                if (((currentCharacter = source[currentPosition++]) == '\\')
 861       //                  && (source[currentPosition] == 'u')) {
 862       //                  getNextUnicodeChar();
 863       //                } else {
 864       //                  if (withoutUnicodePtr != 0) {
 865       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
 866       //                      currentCharacter;
 867       //                  }
 868       //                }
 869       while (currentCharacter != '`') {
 870         /** ** in PHP \r and \n are valid in string literals *** */
 871         //                if ((currentCharacter == '\n')
 872         //                  || (currentCharacter == '\r')) {
 873         //                  // relocate if finding another quote fairly close: thus unicode
 874         // '/u000D' will be fully consumed
 875         //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 876         //                    if (currentPosition + lookAhead == source.length)
 877         //                      break;
 878         //                    if (source[currentPosition + lookAhead] == '\n')
 879         //                      break;
 880         //                    if (source[currentPosition + lookAhead] == '\"') {
 881         //                      currentPosition += lookAhead + 1;
 882         //                      break;
 883         //                    }
 884         //                  }
 885         //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
 886         //                }
 887         if (currentCharacter == '\\') {
 888           int escapeSize = currentPosition;
 889           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
 890           //scanEscapeCharacter make a side effect on this value and we need
 891           // the previous value few lines down this one
 892           scanDoubleQuotedEscapeCharacter();
 893           escapeSize = currentPosition - escapeSize;
 894           if (withoutUnicodePtr == 0) {
 895             //buffer all the entries that have been left aside....
 896             withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
 897             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
 898             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 899           } else { //overwrite the / in the buffer
 900             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
 901             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
 902               // where only one is correct
 903               withoutUnicodePtr--;
 904             }
 905           }
 906         }
 907         // consume next character
 908         unicodeAsBackSlash = false;
 909         currentCharacter = source[currentPosition++];
 910         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 911         //                    && (source[currentPosition] == 'u')) {
 912         //                    getNextUnicodeChar();
 913         //                  } else {
 914         if (withoutUnicodePtr != 0) {
 915           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 916         }
 917         //                  }
 918       }
 919     } catch (IndexOutOfBoundsException e) {
 920       //    reset end position for error reporting
 921       currentPosition -= 2;
 922       throw new InvalidInputException(UNTERMINATED_STRING);
 923     } catch (InvalidInputException e) {
 924       if (e.getMessage().equals(INVALID_ESCAPE)) {
 925         // relocate if finding another quote fairly close: thus unicode
 926         // '/u000D' will be fully consumed
 927         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 928           if (currentPosition + lookAhead == source.length)
 929             break;
 930           if (source[currentPosition + lookAhead] == '\n')
 931             break;
 932           if (source[currentPosition + lookAhead] == '`') {
 933             currentPosition += lookAhead + 1;
 934             break;
 935           }
 936         }
 937       }
 938       throw e; // rethrow
 939     }
 940     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
 941       // //$NON-NLS-?$ where ? is an
 942       // int.
 943       if (currentLine == null) {
 944         currentLine = new NLSLine();
 945         lines.add(currentLine);
 946       }
 947       currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
 948     }
 949   }
 950
 951   public void consumeStringConstant() throws InvalidInputException {
 952     try {
 953       // consume next character
 954       unicodeAsBackSlash = false;
 955       currentCharacter = source[currentPosition++];
 956       //                if (((currentCharacter = source[currentPosition++]) == '\\')
 957       //                  && (source[currentPosition] == 'u')) {
 958       //                  getNextUnicodeChar();
 959       //                } else {
 960       //                  if (withoutUnicodePtr != 0) {
 961       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
 962       //                      currentCharacter;
 963       //                  }
 964       //                }
 965       while (currentCharacter != '\'') {
 966         /** ** in PHP \r and \n are valid in string literals *** */
 967         //                  if ((currentCharacter == '\n')
 968         //                    || (currentCharacter == '\r')) {
 969         //                    // relocate if finding another quote fairly close: thus unicode
 970         // '/u000D' will be fully consumed
 971         //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 972         //                      if (currentPosition + lookAhead == source.length)
 973         //                        break;
 974         //                      if (source[currentPosition + lookAhead] == '\n')
 975         //                        break;
 976         //                      if (source[currentPosition + lookAhead] == '\"') {
 977         //                        currentPosition += lookAhead + 1;
 978         //                        break;
 979         //                      }
 980         //                    }
 981         //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
 982         //                  }
 983         if (currentCharacter == '\\') {
 984           int escapeSize = currentPosition;
 985           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
 986           //scanEscapeCharacter make a side effect on this value and we need
 987           // the previous value few lines down this one
 988           scanSingleQuotedEscapeCharacter();
 989           escapeSize = currentPosition - escapeSize;
 990           if (withoutUnicodePtr == 0) {
 991             //buffer all the entries that have been left aside....
 992             withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
 993             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
 994             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 995           } else { //overwrite the / in the buffer
 996             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
 997             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
 998               // where only one is correct
 999               withoutUnicodePtr--;
1000             }
1001           }
1002         }
1003         // consume next character
1004         unicodeAsBackSlash = false;
1005         currentCharacter = source[currentPosition++];
1006         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1007         //                    && (source[currentPosition] == 'u')) {
1008         //                    getNextUnicodeChar();
1009         //                  } else {
1010         if (withoutUnicodePtr != 0) {
1011           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1012         }
1013         //                  }
1014       }
1015     } catch (IndexOutOfBoundsException e) {
1016       // reset end position for error reporting
1017       currentPosition -= 2;
1018       throw new InvalidInputException(UNTERMINATED_STRING);
1019     } catch (InvalidInputException e) {
1020       if (e.getMessage().equals(INVALID_ESCAPE)) {
1021         // relocate if finding another quote fairly close: thus unicode
1022         // '/u000D' will be fully consumed
1023         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1024           if (currentPosition + lookAhead == source.length)
1025             break;
1026           if (source[currentPosition + lookAhead] == '\n')
1027             break;
1028           if (source[currentPosition + lookAhead] == '\'') {
1029             currentPosition += lookAhead + 1;
1030             break;
1031           }
1032         }
1033       }
1034       throw e; // rethrow
1035     }
1036     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1037       // //$NON-NLS-?$ where ? is an
1038       // int.
1039       if (currentLine == null) {
1040         currentLine = new NLSLine();
1041         lines.add(currentLine);
1042       }
1043       currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1044     }
1045   }
1046
1047   public void consumeStringLiteral() throws InvalidInputException {
1048     try {
1049       // consume next character
1050       unicodeAsBackSlash = false;
1051       currentCharacter = source[currentPosition++];
1052       //                if (((currentCharacter = source[currentPosition++]) == '\\')
1053       //                  && (source[currentPosition] == 'u')) {
1054       //                  getNextUnicodeChar();
1055       //                } else {
1056       //                  if (withoutUnicodePtr != 0) {
1057       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1058       //                      currentCharacter;
1059       //                  }
1060       //                }
1061       while (currentCharacter != '"') {
1062         /** ** in PHP \r and \n are valid in string literals *** */
1063         //                  if ((currentCharacter == '\n')
1064         //                    || (currentCharacter == '\r')) {
1065         //                    // relocate if finding another quote fairly close: thus unicode
1066         // '/u000D' will be fully consumed
1067         //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1068         //                      if (currentPosition + lookAhead == source.length)
1069         //                        break;
1070         //                      if (source[currentPosition + lookAhead] == '\n')
1071         //                        break;
1072         //                      if (source[currentPosition + lookAhead] == '\"') {
1073         //                        currentPosition += lookAhead + 1;
1074         //                        break;
1075         //                      }
1076         //                    }
1077         //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1078         //                  }
1079         if (currentCharacter == '\\') {
1080           int escapeSize = currentPosition;
1081           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1082           //scanEscapeCharacter make a side effect on this value and we need
1083           // the previous value few lines down this one
1084           scanDoubleQuotedEscapeCharacter();
1085           escapeSize = currentPosition - escapeSize;
1086           if (withoutUnicodePtr == 0) {
1087             //buffer all the entries that have been left aside....
1088             withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1089             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1090             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1091           } else { //overwrite the / in the buffer
1092             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1093             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1094               // where only one is correct
1095               withoutUnicodePtr--;
1096             }
1097           }
1098         }
1099         // consume next character
1100         unicodeAsBackSlash = false;
1101         currentCharacter = source[currentPosition++];
1102         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1103         //                    && (source[currentPosition] == 'u')) {
1104         //                    getNextUnicodeChar();
1105         //                  } else {
1106         if (withoutUnicodePtr != 0) {
1107           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1108         }
1109         //                  }
1110       }
1111     } catch (IndexOutOfBoundsException e) {
1112       //    reset end position for error reporting
1113       currentPosition -= 2;
1114       throw new InvalidInputException(UNTERMINATED_STRING);
1115     } catch (InvalidInputException e) {
1116       if (e.getMessage().equals(INVALID_ESCAPE)) {
1117         // relocate if finding another quote fairly close: thus unicode
1118         // '/u000D' will be fully consumed
1119         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1120           if (currentPosition + lookAhead == source.length)
1121             break;
1122           if (source[currentPosition + lookAhead] == '\n')
1123             break;
1124           if (source[currentPosition + lookAhead] == '\"') {
1125             currentPosition += lookAhead + 1;
1126             break;
1127           }
1128         }
1129       }
1130       throw e; // rethrow
1131     }
1132     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1133       // //$NON-NLS-?$ where ? is an
1134       // int.
1135       if (currentLine == null) {
1136         currentLine = new NLSLine();
1137         lines.add(currentLine);
1138       }
1139       currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1140     }
1141   }
1142
1143   public int getNextToken() throws InvalidInputException {
1144     if (!phpMode) {
1145       return getInlinedHTML(currentPosition);
1146     }
1147     if (phpMode) {
1148       this.wasAcr = false;
1149       if (diet) {
1150         jumpOverMethodBody();
1151         diet = false;
1152         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1153       }
1154       try {
1155         while (true) {
1156           withoutUnicodePtr = 0;
1157           //start with a new token
1158           char encapsedChar = ' ';
1159           if (!encapsedStringStack.isEmpty()) {
1160             encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
1161           }
1162           if (encapsedChar != '$' && encapsedChar != ' ') {
1163             currentCharacter = source[currentPosition++];
1164             if (currentCharacter == encapsedChar) {
1165               switch (currentCharacter) {
1166               case '`':
1167                 return TokenNameEncapsedString0;
1168               case '\'':
1169                 return TokenNameEncapsedString1;
1170               case '"':
1171                 return TokenNameEncapsedString2;
1172               }
1173             }
1174             while (currentCharacter != encapsedChar) {
1175               /** ** in PHP \r and \n are valid in string literals *** */
1176               switch (currentCharacter) {
1177               case '\\':
1178                 int escapeSize = currentPosition;
1179                 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1180                 //scanEscapeCharacter make a side effect on this value and
1181                 // we need the previous value few lines down this one
1182                 scanDoubleQuotedEscapeCharacter();
1183                 escapeSize = currentPosition - escapeSize;
1184                 if (withoutUnicodePtr == 0) {
1185                   //buffer all the entries that have been left aside....
1186                   withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1187                   System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1188                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1189                 } else { //overwrite the / in the buffer
1190                   withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1191                   if (backSlashAsUnicodeInString) { //there are TWO \ in
1192                     withoutUnicodePtr--;
1193                   }
1194                 }
1195                 break;
1196               case '$':
1197                 if (isPHPIdentifierStart(source[currentPosition]) || source[currentPosition] == '{') {
1198                   currentPosition--;
1199                   encapsedStringStack.push(new Character('$'));
1200                   return TokenNameSTRING;
1201                 }
1202                 break;
1203               case '{':
1204                 if (source[currentPosition] == '$') { // CURLY_OPEN
1205                   currentPosition--;
1206                   encapsedStringStack.push(new Character('$'));
1207                   return TokenNameSTRING;
1208                 }
1209               }
1210               // consume next character
1211               unicodeAsBackSlash = false;
1212               currentCharacter = source[currentPosition++];
1213               if (withoutUnicodePtr != 0) {
1214                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1215               }
1216               //                  }
1217             } // end while
1218             currentPosition--;
1219             return TokenNameSTRING;
1220           }
1221           // ---------Consume white space and handles startPosition---------
1222           int whiteStart = currentPosition;
1223           startPosition = currentPosition;
1224           currentCharacter = source[currentPosition++];
1225           if (encapsedChar == '$') {
1226             switch (currentCharacter) {
1227             case '\\':
1228               currentCharacter = source[currentPosition++];
1229               return TokenNameSTRING;
1230             case '{':
1231               if (encapsedChar == '$') {
1232                 if (getNextChar('$'))
1233                   return TokenNameLBRACE_DOLLAR;
1234               }
1235               return TokenNameLBRACE;
1236             case '}':
1237               return TokenNameRBRACE;
1238             case '[':
1239               return TokenNameLBRACKET;
1240             case ']':
1241               return TokenNameRBRACKET;
1242             case '\'':
1243               if (tokenizeStrings) {
1244                 consumeStringConstant();
1245                 return TokenNameStringSingleQuote;
1246               }
1247               return TokenNameEncapsedString1;
1248             case '"':
1249               return TokenNameEncapsedString2;
1250             case '`':
1251               if (tokenizeStrings) {
1252                 consumeStringInterpolated();
1253                 return TokenNameStringInterpolated;
1254               }
1255               return TokenNameEncapsedString0;
1256             case '-':
1257               if (getNextChar('>'))
1258                 return TokenNameMINUS_GREATER;
1259               return TokenNameSTRING;
1260             default:
1261               if (currentCharacter == '$') {
1262                 int oldPosition = currentPosition;
1263                 try {
1264                   currentCharacter = source[currentPosition++];
1265                   if (currentCharacter == '{') {
1266                     return TokenNameDOLLAR_LBRACE;
1267                   }
1268                   if (isPHPIdentifierStart(currentCharacter)) {
1269                     return scanIdentifierOrKeyword(true);
1270                   } else {
1271                     currentPosition = oldPosition;
1272                     return TokenNameSTRING;
1273                   }
1274                 } catch (IndexOutOfBoundsException e) {
1275                   currentPosition = oldPosition;
1276                   return TokenNameSTRING;
1277                 }
1278               }
1279               if (isPHPIdentifierStart(currentCharacter))
1280                 return scanIdentifierOrKeyword(false);
1281               if (Character.isDigit(currentCharacter))
1282                 return scanNumber(false);
1283               return TokenNameERROR;
1284             }
1285           }
1286           //          boolean isWhiteSpace;
1287
1288           while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1289             startPosition = currentPosition;
1290             currentCharacter = source[currentPosition++];
1291             //            if (((currentCharacter = source[currentPosition++]) == '\\')
1292             //              && (source[currentPosition] == 'u')) {
1293             //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
1294             //            } else {
1295             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1296               checkNonExternalizeString();
1297               if (recordLineSeparator) {
1298                 pushLineSeparator();
1299               } else {
1300                 currentLine = null;
1301               }
1302             }
1303             //            isWhiteSpace = (currentCharacter == ' ')
1304             //                || Character.isWhitespace(currentCharacter);
1305             //            }
1306           }
1307           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1308             // reposition scanner in case we are interested by spaces as tokens
1309             currentPosition--;
1310             startPosition = whiteStart;
1311             return TokenNameWHITESPACE;
1312           }
1313           //little trick to get out in the middle of a source compuation
1314           if (currentPosition > eofPosition)
1315             return TokenNameEOF;
1316           // ---------Identify the next token-------------
1317           switch (currentCharacter) {
1318           case '(':
1319             return getCastOrParen();
1320           case ')':
1321             return TokenNameRPAREN;
1322           case '{':
1323             return TokenNameLBRACE;
1324           case '}':
1325             return TokenNameRBRACE;
1326           case '[':
1327             return TokenNameLBRACKET;
1328           case ']':
1329             return TokenNameRBRACKET;
1330           case ';':
1331             return TokenNameSEMICOLON;
1332           case ',':
1333             return TokenNameCOMMA;
1334           case '.':
1335             if (getNextChar('='))
1336               return TokenNameDOT_EQUAL;
1337             if (getNextCharAsDigit())
1338               return scanNumber(true);
1339             return TokenNameDOT;
1340           case '+': {
1341             int test;
1342             if ((test = getNextChar('+', '=')) == 0)
1343               return TokenNamePLUS_PLUS;
1344             if (test > 0)
1345               return TokenNamePLUS_EQUAL;
1346             return TokenNamePLUS;
1347           }
1348           case '-': {
1349             int test;
1350             if ((test = getNextChar('-', '=')) == 0)
1351               return TokenNameMINUS_MINUS;
1352             if (test > 0)
1353               return TokenNameMINUS_EQUAL;
1354             if (getNextChar('>'))
1355               return TokenNameMINUS_GREATER;
1356             return TokenNameMINUS;
1357           }
1358           case '~':
1359             if (getNextChar('='))
1360               return TokenNameTWIDDLE_EQUAL;
1361             return TokenNameTWIDDLE;
1362           case '!':
1363             if (getNextChar('=')) {
1364               if (getNextChar('=')) {
1365                 return TokenNameNOT_EQUAL_EQUAL;
1366               }
1367               return TokenNameNOT_EQUAL;
1368             }
1369             return TokenNameNOT;
1370           case '*':
1371             if (getNextChar('='))
1372               return TokenNameMULTIPLY_EQUAL;
1373             return TokenNameMULTIPLY;
1374           case '%':
1375             if (getNextChar('='))
1376               return TokenNameREMAINDER_EQUAL;
1377             return TokenNameREMAINDER;
1378           case '<': {
1379             int oldPosition = currentPosition;
1380             try {
1381               currentCharacter = source[currentPosition++];
1382             } catch (IndexOutOfBoundsException e) {
1383               currentPosition = oldPosition;
1384               return TokenNameLESS;
1385             }
1386             switch (currentCharacter) {
1387             case '=':
1388               return TokenNameLESS_EQUAL;
1389             case '>':
1390               return TokenNameNOT_EQUAL;
1391             case '<':
1392               if (getNextChar('='))
1393                 return TokenNameLEFT_SHIFT_EQUAL;
1394               if (getNextChar('<')) {
1395                 currentCharacter = source[currentPosition++];
1396                 while (Character.isWhitespace(currentCharacter)) {
1397                   currentCharacter = source[currentPosition++];
1398                 }
1399                 int heredocStart = currentPosition - 1;
1400                 int heredocLength = 0;
1401                 if (isPHPIdentifierStart(currentCharacter)) {
1402                   currentCharacter = source[currentPosition++];
1403                 } else {
1404                   return TokenNameERROR;
1405                 }
1406                 while (isPHPIdentifierPart(currentCharacter)) {
1407                   currentCharacter = source[currentPosition++];
1408                 }
1409                 heredocLength = currentPosition - heredocStart - 1;
1410                 // heredoc end-tag determination
1411                 boolean endTag = true;
1412                 char ch;
1413                 do {
1414                   ch = source[currentPosition++];
1415                   if (ch == '\r' || ch == '\n') {
1416                     if (recordLineSeparator) {
1417                       pushLineSeparator();
1418                     } else {
1419                       currentLine = null;
1420                     }
1421                     for (int i = 0; i < heredocLength; i++) {
1422                       if (source[currentPosition + i] != source[heredocStart + i]) {
1423                         endTag = false;
1424                         break;
1425                       }
1426                     }
1427                     if (endTag) {
1428                       currentPosition += heredocLength - 1;
1429                       currentCharacter = source[currentPosition++];
1430                       break; // do...while loop
1431                     } else {
1432                       endTag = true;
1433                     }
1434                   }
1435                 } while (true);
1436                 return TokenNameHEREDOC;
1437               }
1438               return TokenNameLEFT_SHIFT;
1439             }
1440             currentPosition = oldPosition;
1441             return TokenNameLESS;
1442           }
1443           case '>': {
1444             int test;
1445             if ((test = getNextChar('=', '>')) == 0)
1446               return TokenNameGREATER_EQUAL;
1447             if (test > 0) {
1448               if ((test = getNextChar('=', '>')) == 0)
1449                 return TokenNameRIGHT_SHIFT_EQUAL;
1450               return TokenNameRIGHT_SHIFT;
1451             }
1452             return TokenNameGREATER;
1453           }
1454           case '=':
1455             if (getNextChar('=')) {
1456               if (getNextChar('=')) {
1457                 return TokenNameEQUAL_EQUAL_EQUAL;
1458               }
1459               return TokenNameEQUAL_EQUAL;
1460             }
1461             if (getNextChar('>'))
1462               return TokenNameEQUAL_GREATER;
1463             return TokenNameEQUAL;
1464           case '&': {
1465             int test;
1466             if ((test = getNextChar('&', '=')) == 0)
1467               return TokenNameAND_AND;
1468             if (test > 0)
1469               return TokenNameAND_EQUAL;
1470             return TokenNameAND;
1471           }
1472           case '|': {
1473             int test;
1474             if ((test = getNextChar('|', '=')) == 0)
1475               return TokenNameOR_OR;
1476             if (test > 0)
1477               return TokenNameOR_EQUAL;
1478             return TokenNameOR;
1479           }
1480           case '^':
1481             if (getNextChar('='))
1482               return TokenNameXOR_EQUAL;
1483             return TokenNameXOR;
1484           case '?':
1485             if (getNextChar('>')) {
1486               phpMode = false;
1487               if (currentPosition == source.length) {
1488                 phpMode = true;
1489                 return TokenNameINLINE_HTML;
1490               }
1491               return getInlinedHTML(currentPosition - 2);
1492             }
1493             return TokenNameQUESTION;
1494           case ':':
1495             if (getNextChar(':'))
1496               return TokenNamePAAMAYIM_NEKUDOTAYIM;
1497             return TokenNameCOLON;
1498           case '@':
1499             return TokenNameAT;
1500           case '\'':
1501             consumeStringConstant();
1502             return TokenNameStringSingleQuote;
1503           case '"':
1504             if (tokenizeStrings) {
1505               consumeStringLiteral();
1506               return TokenNameStringDoubleQuote;
1507             }
1508             return TokenNameEncapsedString2;
1509           case '`':
1510             if (tokenizeStrings) {
1511               consumeStringInterpolated();
1512               return TokenNameStringInterpolated;
1513             }
1514             return TokenNameEncapsedString0;
1515           case '#':
1516           case '/': {
1517             char startChar = currentCharacter;
1518             if (getNextChar('=')) {
1519               return TokenNameDIVIDE_EQUAL;
1520             }
1521             int test;
1522             if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1523               //line comment
1524               this.lastCommentLinePosition = this.currentPosition;
1525               int endPositionForLineComment = 0;
1526               try { //get the next char
1527                 currentCharacter = source[currentPosition++];
1528                 //                    if (((currentCharacter = source[currentPosition++])
1529                 //                      == '\\')
1530                 //                      && (source[currentPosition] == 'u')) {
1531                 //                      //-------------unicode traitement ------------
1532                 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1533                 //                      currentPosition++;
1534                 //                      while (source[currentPosition] == 'u') {
1535                 //                        currentPosition++;
1536                 //                      }
1537                 //                      if ((c1 =
1538                 //                        Character.getNumericValue(source[currentPosition++]))
1539                 //                        > 15
1540                 //                        || c1 < 0
1541                 //                        || (c2 =
1542                 //                          Character.getNumericValue(source[currentPosition++]))
1543                 //                          > 15
1544                 //                        || c2 < 0
1545                 //                        || (c3 =
1546                 //                          Character.getNumericValue(source[currentPosition++]))
1547                 //                          > 15
1548                 //                        || c3 < 0
1549                 //                        || (c4 =
1550                 //                          Character.getNumericValue(source[currentPosition++]))
1551                 //                          > 15
1552                 //                        || c4 < 0) {
1553                 //                        throw new
1554                 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1555                 //                      } else {
1556                 //                        currentCharacter =
1557                 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1558                 //                      }
1559                 //                    }
1560                 //handle the \\u case manually into comment
1561                 //                    if (currentCharacter == '\\') {
1562                 //                      if (source[currentPosition] == '\\')
1563                 //                        currentPosition++;
1564                 //                    } //jump over the \\
1565                 boolean isUnicode = false;
1566                 while (currentCharacter != '\r' && currentCharacter != '\n') {
1567                   this.lastCommentLinePosition = this.currentPosition;
1568                   if (currentCharacter == '?') {
1569                     if (getNextChar('>')) {
1570                       startPosition = currentPosition - 2;
1571                       phpMode = false;
1572                       return TokenNameINLINE_HTML;
1573                     }
1574                   }
1575                   //get the next char
1576                   isUnicode = false;
1577                   currentCharacter = source[currentPosition++];
1578                   //                      if (((currentCharacter = source[currentPosition++])
1579                   //                        == '\\')
1580                   //                        && (source[currentPosition] == 'u')) {
1581                   //                        isUnicode = true;
1582                   //                        //-------------unicode traitement ------------
1583                   //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1584                   //                        currentPosition++;
1585                   //                        while (source[currentPosition] == 'u') {
1586                   //                          currentPosition++;
1587                   //                        }
1588                   //                        if ((c1 =
1589                   //                          Character.getNumericValue(source[currentPosition++]))
1590                   //                          > 15
1591                   //                          || c1 < 0
1592                   //                          || (c2 =
1593                   //                            Character.getNumericValue(
1594                   //                              source[currentPosition++]))
1595                   //                            > 15
1596                   //                          || c2 < 0
1597                   //                          || (c3 =
1598                   //                            Character.getNumericValue(
1599                   //                              source[currentPosition++]))
1600                   //                            > 15
1601                   //                          || c3 < 0
1602                   //                          || (c4 =
1603                   //                            Character.getNumericValue(
1604                   //                              source[currentPosition++]))
1605                   //                            > 15
1606                   //                          || c4 < 0) {
1607                   //                          throw new
1608                   // InvalidInputException(INVALID_UNICODE_ESCAPE);
1609                   //                        } else {
1610                   //                          currentCharacter =
1611                   //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1612                   //                        }
1613                   //                      }
1614                   //handle the \\u case manually into comment
1615                   //                      if (currentCharacter == '\\') {
1616                   //                        if (source[currentPosition] == '\\')
1617                   //                          currentPosition++;
1618                   //                      } //jump over the \\
1619                 }
1620                 if (isUnicode) {
1621                   endPositionForLineComment = currentPosition - 6;
1622                 } else {
1623                   endPositionForLineComment = currentPosition - 1;
1624                 }
1625                 //                    recordComment(false);
1626                 recordComment(TokenNameCOMMENT_LINE);
1627                 if (this.taskTags != null)
1628                   checkTaskTag(this.startPosition, this.currentPosition);
1629                 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1630                   checkNonExternalizeString();
1631                   if (recordLineSeparator) {
1632                     if (isUnicode) {
1633                       pushUnicodeLineSeparator();
1634                     } else {
1635                       pushLineSeparator();
1636                     }
1637                   } else {
1638                     currentLine = null;
1639                   }
1640                 }
1641                 if (tokenizeComments) {
1642                   if (!isUnicode) {
1643                     currentPosition = endPositionForLineComment;
1644                     // reset one character behind
1645                   }
1646                   return TokenNameCOMMENT_LINE;
1647                 }
1648               } catch (IndexOutOfBoundsException e) { //an eof will them
1649                 // be generated
1650                 if (tokenizeComments) {
1651                   currentPosition--;
1652                   // reset one character behind
1653                   return TokenNameCOMMENT_LINE;
1654                 }
1655               }
1656               break;
1657             }
1658             if (test > 0) {
1659               //traditional and annotation comment
1660               boolean isJavadoc = false, star = false;
1661               // consume next character
1662               unicodeAsBackSlash = false;
1663               currentCharacter = source[currentPosition++];
1664               //                  if (((currentCharacter = source[currentPosition++]) ==
1665               // '\\')
1666               //                    && (source[currentPosition] == 'u')) {
1667               //                    getNextUnicodeChar();
1668               //                  } else {
1669               //                    if (withoutUnicodePtr != 0) {
1670               //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1671               //                        currentCharacter;
1672               //                    }
1673               //                  }
1674               if (currentCharacter == '*') {
1675                 isJavadoc = true;
1676                 star = true;
1677               }
1678               if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1679                 checkNonExternalizeString();
1680                 if (recordLineSeparator) {
1681                   pushLineSeparator();
1682                 } else {
1683                   currentLine = null;
1684                 }
1685               }
1686               try { //get the next char
1687                 currentCharacter = source[currentPosition++];
1688                 //                    if (((currentCharacter = source[currentPosition++])
1689                 //                      == '\\')
1690                 //                      && (source[currentPosition] == 'u')) {
1691                 //                      //-------------unicode traitement ------------
1692                 //                      getNextUnicodeChar();
1693                 //                    }
1694                 //handle the \\u case manually into comment
1695                 //                    if (currentCharacter == '\\') {
1696                 //                      if (source[currentPosition] == '\\')
1697                 //                        currentPosition++;
1698                 //                      //jump over the \\
1699                 //                    }
1700                 // empty comment is not a javadoc /**/
1701                 if (currentCharacter == '/') {
1702                   isJavadoc = false;
1703                 }
1704                 //loop until end of comment */
1705                 while ((currentCharacter != '/') || (!star)) {
1706                   if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1707                     checkNonExternalizeString();
1708                     if (recordLineSeparator) {
1709                       pushLineSeparator();
1710                     } else {
1711                       currentLine = null;
1712                     }
1713                   }
1714                   star = currentCharacter == '*';
1715                   //get next char
1716                   currentCharacter = source[currentPosition++];
1717                   //                      if (((currentCharacter = source[currentPosition++])
1718                   //                        == '\\')
1719                   //                        && (source[currentPosition] == 'u')) {
1720                   //                        //-------------unicode traitement ------------
1721                   //                        getNextUnicodeChar();
1722                   //                      }
1723                   //handle the \\u case manually into comment
1724                   //                      if (currentCharacter == '\\') {
1725                   //                        if (source[currentPosition] == '\\')
1726                   //                          currentPosition++;
1727                   //                      } //jump over the \\
1728                 }
1729                 //recordComment(isJavadoc);
1730                 if (isJavadoc) {
1731                   recordComment(TokenNameCOMMENT_PHPDOC);
1732                 } else {
1733                   recordComment(TokenNameCOMMENT_BLOCK);
1734                 }
1735
1736                 if (tokenizeComments) {
1737                   if (isJavadoc)
1738                     return TokenNameCOMMENT_PHPDOC;
1739                   return TokenNameCOMMENT_BLOCK;
1740                 }
1741               } catch (IndexOutOfBoundsException e) {
1742                 //                  reset end position for error reporting
1743                 currentPosition -= 2;
1744                 throw new InvalidInputException(UNTERMINATED_COMMENT);
1745               }
1746               break;
1747             }
1748             return TokenNameDIVIDE;
1749           }
1750           case '\u001a':
1751             if (atEnd())
1752               return TokenNameEOF;
1753             //the atEnd may not be <currentPosition == source.length> if
1754             // source is only some part of a real (external) stream
1755             throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1756           default:
1757             if (currentCharacter == '$') {
1758               int oldPosition = currentPosition;
1759               try {
1760                 currentCharacter = source[currentPosition++];
1761                 if (isPHPIdentifierStart(currentCharacter)) {
1762                   return scanIdentifierOrKeyword(true);
1763                 } else {
1764                   currentPosition = oldPosition;
1765                   return TokenNameDOLLAR;
1766                 }
1767               } catch (IndexOutOfBoundsException e) {
1768                 currentPosition = oldPosition;
1769                 return TokenNameDOLLAR;
1770               }
1771             }
1772             if (isPHPIdentifierStart(currentCharacter))
1773               return scanIdentifierOrKeyword(false);
1774             if (Character.isDigit(currentCharacter))
1775               return scanNumber(false);
1776             return TokenNameERROR;
1777           }
1778         }
1779       } //-----------------end switch while try--------------------
1780       catch (IndexOutOfBoundsException e) {
1781       }
1782     }
1783     return TokenNameEOF;
1784   }
1785
1786   private int getInlinedHTML(int start) throws InvalidInputException {
1787     int token = getInlinedHTMLToken(start);
1788     if (token == TokenNameINLINE_HTML) {
1789       //                Stack stack = new Stack();
1790       //                // scan html for errors
1791       //                Source inlinedHTMLSource = new Source(new String(source, startPosition, currentPosition - startPosition));
1792       //                int lastPHPEndPos=0;
1793       //                for (Iterator i=inlinedHTMLSource.getNextTagIterator(0); i.hasNext();) {
1794       //                    Tag tag=(Tag)i.next();
1795       //
1796       //                    if (tag instanceof StartTag) {
1797       //                        StartTag startTag=(StartTag)tag;
1798       //                      // System.out.println("startTag: "+tag);
1799       //                        if (startTag.isServerTag()) {
1800       //                          // TODO : what to do with a server tag ?
1801       //                        } else {
1802       //                            // do whatever with HTML start tag
1803       //                            // use startTag.getElement() to find the element corresponding
1804       //                            // to this start tag which may be useful if you implement code
1805       //                            // folding etc
1806       //                                stack.push(startTag);
1807       //                        }
1808       //                    } else {
1809       //                        EndTag endTag=(EndTag)tag;
1810       //                        StartTag stag = (StartTag) stack.peek();
1811       //// System.out.println("endTag: "+tag);
1812       //                        // do whatever with HTML end tag.
1813       //                    }
1814       //                }
1815     }
1816     return token;
1817   }
1818
1819   /**
1820    * @return
1821    * @throws InvalidInputException
1822    */
1823   private int getInlinedHTMLToken(int start) throws InvalidInputException {
1824     //    int htmlPosition = start;
1825     if (currentPosition > source.length) {
1826       currentPosition = source.length;
1827       return TokenNameEOF;
1828     }
1829     startPosition = start;
1830     try {
1831       while (!phpMode) {
1832         currentCharacter = source[currentPosition++];
1833         if (currentCharacter == '<') {
1834           if (getNextChar('?')) {
1835             currentCharacter = source[currentPosition++];
1836             if ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1837               // <?
1838               if (ignorePHPOneLiner) {
1839                 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1840                   phpMode = true;
1841                   return TokenNameINLINE_HTML;
1842                 }
1843               } else {
1844                 phpMode = true;
1845                 return TokenNameINLINE_HTML;
1846               }
1847             } else {
1848               boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
1849               if (phpStart) {
1850                 int test = getNextChar('H', 'h');
1851                 if (test >= 0) {
1852                   test = getNextChar('P', 'p');
1853                   if (test >= 0) {
1854                     // <?PHP <?php
1855                     if (ignorePHPOneLiner) {
1856                       if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1857                         phpMode = true;
1858                         return TokenNameINLINE_HTML;
1859                       }
1860                     } else {
1861                       phpMode = true;
1862                       return TokenNameINLINE_HTML;
1863                     }
1864                   }
1865                 }
1866               }
1867             }
1868           }
1869         }
1870         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1871           if (recordLineSeparator) {
1872             pushLineSeparator();
1873           } else {
1874             currentLine = null;
1875           }
1876         }
1877       } //-----------------while--------------------
1878       phpMode = true;
1879       return TokenNameINLINE_HTML;
1880     } //-----------------try--------------------
1881     catch (IndexOutOfBoundsException e) {
1882       startPosition = start;
1883       currentPosition--;
1884     }
1885     phpMode = true;
1886     return TokenNameINLINE_HTML;
1887   }
1888
1889   /**
1890    * @return
1891    */
1892   private int lookAheadLinePHPTag() {
1893     // check if the PHP is only in this line (for CodeFormatter)
1894     int currentPositionInLine = currentPosition;
1895     char previousCharInLine = ' ';
1896     char currentCharInLine = ' ';
1897     boolean singleQuotedStringActive = false;
1898     boolean doubleQuotedStringActive = false;
1899
1900     try {
1901       // look ahead in this line
1902       while (true) {
1903         previousCharInLine = currentCharInLine;
1904         currentCharInLine = source[currentPositionInLine++];
1905         switch (currentCharInLine) {
1906         case '>':
1907           if (previousCharInLine == '?') {
1908             // update the scanner's current Position in the source
1909             currentPosition = currentPositionInLine;
1910             // use as "dummy" token
1911             return TokenNameEOF;
1912           }
1913           break;
1914         case '\"':
1915           if (doubleQuotedStringActive) {
1916             if (previousCharInLine != '\\') {
1917               doubleQuotedStringActive = false;
1918             }
1919           } else {
1920             if (!singleQuotedStringActive) {
1921               doubleQuotedStringActive = true;
1922             }
1923           }
1924           break;
1925         case '\'':
1926           if (singleQuotedStringActive) {
1927             if (previousCharInLine != '\\') {
1928               singleQuotedStringActive = false;
1929             }
1930           } else {
1931             if (!doubleQuotedStringActive) {
1932               singleQuotedStringActive = true;
1933             }
1934           }
1935           break;
1936         case '\n':
1937           phpMode = true;
1938           return TokenNameINLINE_HTML;
1939         case '#':
1940           if (!singleQuotedStringActive && !doubleQuotedStringActive) {
1941             phpMode = true;
1942             return TokenNameINLINE_HTML;
1943           }
1944           break;
1945         case '/':
1946           if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1947             phpMode = true;
1948             return TokenNameINLINE_HTML;
1949           }
1950           break;
1951         case '*':
1952           if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1953             phpMode = true;
1954             return TokenNameINLINE_HTML;
1955           }
1956           break;
1957         }
1958       }
1959     } catch (IndexOutOfBoundsException e) {
1960       phpMode = true;
1961       currentPosition = currentPositionInLine;
1962       return TokenNameINLINE_HTML;
1963     }
1964   }
1965
1966   //  public final void getNextUnicodeChar()
1967   //    throws IndexOutOfBoundsException, InvalidInputException {
1968   //    //VOID
1969   //    //handle the case of unicode.
1970   //    //when a unicode appears then we must use a buffer that holds char
1971   // internal values
1972   //    //At the end of this method currentCharacter holds the new visited char
1973   //    //and currentPosition points right next after it
1974   //
1975   //    //ALL getNextChar.... ARE OPTIMIZED COPIES
1976   //
1977   //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1978   //    currentPosition++;
1979   //    while (source[currentPosition] == 'u') {
1980   //      currentPosition++;
1981   //      unicodeSize++;
1982   //    }
1983   //
1984   //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1985   //      || c1 < 0
1986   //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1987   //      || c2 < 0
1988   //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1989   //      || c3 < 0
1990   //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1991   //      || c4 < 0) {
1992   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1993   //    } else {
1994   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1995   //      //need the unicode buffer
1996   //      if (withoutUnicodePtr == 0) {
1997   //        //buffer all the entries that have been left aside....
1998   //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1999   //        System.arraycopy(
2000   //          source,
2001   //          startPosition,
2002   //          withoutUnicodeBuffer,
2003   //          1,
2004   //          withoutUnicodePtr);
2005   //      }
2006   //      //fill the buffer with the char
2007   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2008   //    }
2009   //    unicodeAsBackSlash = currentCharacter == '\\';
2010   //  }
2011   /*
2012    * Tokenize a method body, assuming that curly brackets are properly balanced.
2013    */
2014   public final void jumpOverMethodBody() {
2015     this.wasAcr = false;
2016     int found = 1;
2017     try {
2018       while (true) { //loop for jumping over comments
2019         // ---------Consume white space and handles startPosition---------
2020         boolean isWhiteSpace;
2021         do {
2022           startPosition = currentPosition;
2023           currentCharacter = source[currentPosition++];
2024           //          if (((currentCharacter = source[currentPosition++]) == '\\')
2025           //            && (source[currentPosition] == 'u')) {
2026           //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
2027           //          } else {
2028           if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2029             pushLineSeparator();
2030           isWhiteSpace = Character.isWhitespace(currentCharacter);
2031           //          }
2032         } while (isWhiteSpace);
2033         // -------consume token until } is found---------
2034         switch (currentCharacter) {
2035         case '{':
2036           found++;
2037           break;
2038         case '}':
2039           found--;
2040           if (found == 0)
2041             return;
2042           break;
2043         case '\'': {
2044           boolean test;
2045           test = getNextChar('\\');
2046           if (test) {
2047             try {
2048               scanDoubleQuotedEscapeCharacter();
2049             } catch (InvalidInputException ex) {
2050             }
2051             ;
2052           } else {
2053             //                try { // consume next character
2054             unicodeAsBackSlash = false;
2055             currentCharacter = source[currentPosition++];
2056             //                  if (((currentCharacter = source[currentPosition++]) == '\\')
2057             //                    && (source[currentPosition] == 'u')) {
2058             //                    getNextUnicodeChar();
2059             //                  } else {
2060             if (withoutUnicodePtr != 0) {
2061               withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2062             }
2063             //                  }
2064             //                } catch (InvalidInputException ex) {
2065             //                };
2066           }
2067           getNextChar('\'');
2068           break;
2069         }
2070         case '"':
2071           try {
2072             //              try { // consume next character
2073             unicodeAsBackSlash = false;
2074             currentCharacter = source[currentPosition++];
2075             //                if (((currentCharacter = source[currentPosition++]) == '\\')
2076             //                  && (source[currentPosition] == 'u')) {
2077             //                  getNextUnicodeChar();
2078             //                } else {
2079             if (withoutUnicodePtr != 0) {
2080               withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2081             }
2082             //                }
2083             //              } catch (InvalidInputException ex) {
2084             //              };
2085             while (currentCharacter != '"') {
2086               if (currentCharacter == '\r') {
2087                 if (source[currentPosition] == '\n')
2088                   currentPosition++;
2089                 break;
2090                 // the string cannot go further that the line
2091               }
2092               if (currentCharacter == '\n') {
2093                 break;
2094                 // the string cannot go further that the line
2095               }
2096               if (currentCharacter == '\\') {
2097                 try {
2098                   scanDoubleQuotedEscapeCharacter();
2099                 } catch (InvalidInputException ex) {
2100                 }
2101                 ;
2102               }
2103               //                try { // consume next character
2104               unicodeAsBackSlash = false;
2105               currentCharacter = source[currentPosition++];
2106               //                  if (((currentCharacter = source[currentPosition++]) == '\\')
2107               //                    && (source[currentPosition] == 'u')) {
2108               //                    getNextUnicodeChar();
2109               //                  } else {
2110               if (withoutUnicodePtr != 0) {
2111                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2112               }
2113               //                  }
2114               //                } catch (InvalidInputException ex) {
2115               //                };
2116             }
2117           } catch (IndexOutOfBoundsException e) {
2118             return;
2119           }
2120           break;
2121         case '/': {
2122           int test;
2123           if ((test = getNextChar('/', '*')) == 0) {
2124             //line comment
2125             try {
2126               //get the next char
2127               currentCharacter = source[currentPosition++];
2128               //                  if (((currentCharacter = source[currentPosition++]) ==
2129               // '\\')
2130               //                    && (source[currentPosition] == 'u')) {
2131               //                    //-------------unicode traitement ------------
2132               //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2133               //                    currentPosition++;
2134               //                    while (source[currentPosition] == 'u') {
2135               //                      currentPosition++;
2136               //                    }
2137               //                    if ((c1 =
2138               //                      Character.getNumericValue(source[currentPosition++]))
2139               //                      > 15
2140               //                      || c1 < 0
2141               //                      || (c2 =
2142               //                        Character.getNumericValue(source[currentPosition++]))
2143               //                        > 15
2144               //                      || c2 < 0
2145               //                      || (c3 =
2146               //                        Character.getNumericValue(source[currentPosition++]))
2147               //                        > 15
2148               //                      || c3 < 0
2149               //                      || (c4 =
2150               //                        Character.getNumericValue(source[currentPosition++]))
2151               //                        > 15
2152               //                      || c4 < 0) {
2153               //                      //error don't care of the value
2154               //                      currentCharacter = 'A';
2155               //                    } //something different from \n and \r
2156               //                    else {
2157               //                      currentCharacter =
2158               //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2159               //                    }
2160               //                  }
2161               while (currentCharacter != '\r' && currentCharacter != '\n') {
2162                 //get the next char
2163                 currentCharacter = source[currentPosition++];
2164                 //                    if (((currentCharacter = source[currentPosition++])
2165                 //                      == '\\')
2166                 //                      && (source[currentPosition] == 'u')) {
2167                 //                      //-------------unicode traitement ------------
2168                 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2169                 //                      currentPosition++;
2170                 //                      while (source[currentPosition] == 'u') {
2171                 //                        currentPosition++;
2172                 //                      }
2173                 //                      if ((c1 =
2174                 //                        Character.getNumericValue(source[currentPosition++]))
2175                 //                        > 15
2176                 //                        || c1 < 0
2177                 //                        || (c2 =
2178                 //                          Character.getNumericValue(source[currentPosition++]))
2179                 //                          > 15
2180                 //                        || c2 < 0
2181                 //                        || (c3 =
2182                 //                          Character.getNumericValue(source[currentPosition++]))
2183                 //                          > 15
2184                 //                        || c3 < 0
2185                 //                        || (c4 =
2186                 //                          Character.getNumericValue(source[currentPosition++]))
2187                 //                          > 15
2188                 //                        || c4 < 0) {
2189                 //                        //error don't care of the value
2190                 //                        currentCharacter = 'A';
2191                 //                      } //something different from \n and \r
2192                 //                      else {
2193                 //                        currentCharacter =
2194                 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2195                 //                      }
2196                 //                    }
2197               }
2198               if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2199                 pushLineSeparator();
2200             } catch (IndexOutOfBoundsException e) {
2201             } //an eof will them be generated
2202             break;
2203           }
2204           if (test > 0) {
2205             //traditional and annotation comment
2206             boolean star = false;
2207             //                try { // consume next character
2208             unicodeAsBackSlash = false;
2209             currentCharacter = source[currentPosition++];
2210             //                  if (((currentCharacter = source[currentPosition++]) == '\\')
2211             //                    && (source[currentPosition] == 'u')) {
2212             //                    getNextUnicodeChar();
2213             //                  } else {
2214             if (withoutUnicodePtr != 0) {
2215               withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2216             }
2217             //                  };
2218             //                } catch (InvalidInputException ex) {
2219             //                };
2220             if (currentCharacter == '*') {
2221               star = true;
2222             }
2223             if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2224               pushLineSeparator();
2225             try { //get the next char
2226               currentCharacter = source[currentPosition++];
2227               //                  if (((currentCharacter = source[currentPosition++]) ==
2228               // '\\')
2229               //                    && (source[currentPosition] == 'u')) {
2230               //                    //-------------unicode traitement ------------
2231               //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2232               //                    currentPosition++;
2233               //                    while (source[currentPosition] == 'u') {
2234               //                      currentPosition++;
2235               //                    }
2236               //                    if ((c1 =
2237               //                      Character.getNumericValue(source[currentPosition++]))
2238               //                      > 15
2239               //                      || c1 < 0
2240               //                      || (c2 =
2241               //                        Character.getNumericValue(source[currentPosition++]))
2242               //                        > 15
2243               //                      || c2 < 0
2244               //                      || (c3 =
2245               //                        Character.getNumericValue(source[currentPosition++]))
2246               //                        > 15
2247               //                      || c3 < 0
2248               //                      || (c4 =
2249               //                        Character.getNumericValue(source[currentPosition++]))
2250               //                        > 15
2251               //                      || c4 < 0) {
2252               //                      //error don't care of the value
2253               //                      currentCharacter = 'A';
2254               //                    } //something different from * and /
2255               //                    else {
2256               //                      currentCharacter =
2257               //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2258               //                    }
2259               //                  }
2260               //loop until end of comment */
2261               while ((currentCharacter != '/') || (!star)) {
2262                 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2263                   pushLineSeparator();
2264                 star = currentCharacter == '*';
2265                 //get next char
2266                 currentCharacter = source[currentPosition++];
2267                 //                    if (((currentCharacter = source[currentPosition++])
2268                 //                      == '\\')
2269                 //                      && (source[currentPosition] == 'u')) {
2270                 //                      //-------------unicode traitement ------------
2271                 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2272                 //                      currentPosition++;
2273                 //                      while (source[currentPosition] == 'u') {
2274                 //                        currentPosition++;
2275                 //                      }
2276                 //                      if ((c1 =
2277                 //                        Character.getNumericValue(source[currentPosition++]))
2278                 //                        > 15
2279                 //                        || c1 < 0
2280                 //                        || (c2 =
2281                 //                          Character.getNumericValue(source[currentPosition++]))
2282                 //                          > 15
2283                 //                        || c2 < 0
2284                 //                        || (c3 =
2285                 //                          Character.getNumericValue(source[currentPosition++]))
2286                 //                          > 15
2287                 //                        || c3 < 0
2288                 //                        || (c4 =
2289                 //                          Character.getNumericValue(source[currentPosition++]))
2290                 //                          > 15
2291                 //                        || c4 < 0) {
2292                 //                        //error don't care of the value
2293                 //                        currentCharacter = 'A';
2294                 //                      } //something different from * and /
2295                 //                      else {
2296                 //                        currentCharacter =
2297                 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2298                 //                      }
2299                 //                    }
2300               }
2301             } catch (IndexOutOfBoundsException e) {
2302               return;
2303             }
2304             break;
2305           }
2306           break;
2307         }
2308         default:
2309           if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
2310             try {
2311               scanIdentifierOrKeyword((currentCharacter == '$'));
2312             } catch (InvalidInputException ex) {
2313             }
2314             ;
2315             break;
2316           }
2317           if (Character.isDigit(currentCharacter)) {
2318             try {
2319               scanNumber(false);
2320             } catch (InvalidInputException ex) {
2321             }
2322             ;
2323             break;
2324           }
2325         }
2326       }
2327       //-----------------end switch while try--------------------
2328     } catch (IndexOutOfBoundsException e) {
2329     } catch (InvalidInputException e) {
2330     }
2331     return;
2332   }
2333
2334   //  public final boolean jumpOverUnicodeWhiteSpace()
2335   //    throws InvalidInputException {
2336   //    //BOOLEAN
2337   //    //handle the case of unicode. Jump over the next whiteSpace
2338   //    //making startPosition pointing on the next available char
2339   //    //On false, the currentCharacter is filled up with a potential
2340   //    //correct char
2341   //
2342   //    try {
2343   //      this.wasAcr = false;
2344   //      int c1, c2, c3, c4;
2345   //      int unicodeSize = 6;
2346   //      currentPosition++;
2347   //      while (source[currentPosition] == 'u') {
2348   //        currentPosition++;
2349   //        unicodeSize++;
2350   //      }
2351   //
2352   //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2353   //        || c1 < 0)
2354   //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2355   //          || c2 < 0)
2356   //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2357   //          || c3 < 0)
2358   //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2359   //          || c4 < 0)) {
2360   //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2361   //      }
2362   //
2363   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2364   //      if (recordLineSeparator
2365   //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2366   //        pushLineSeparator();
2367   //      if (Character.isWhitespace(currentCharacter))
2368   //        return true;
2369   //
2370   //      //buffer the new char which is not a white space
2371   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2372   //      //withoutUnicodePtr == 1 is true here
2373   //      return false;
2374   //    } catch (IndexOutOfBoundsException e) {
2375   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2376   //    }
2377   //  }
2378   public final int[] getLineEnds() {
2379     //return a bounded copy of this.lineEnds
2380     int[] copy;
2381     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2382     return copy;
2383   }
2384
2385   public char[] getSource() {
2386     return this.source;
2387   }
2388
2389   public static boolean isIdentifierOrKeyword(int token) {
2390     return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2391   }
2392
2393   final char[] optimizedCurrentTokenSource1() {
2394     //return always the same char[] build only once
2395     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2396     char charOne = source[startPosition];
2397     switch (charOne) {
2398     case 'a':
2399       return charArray_a;
2400     case 'b':
2401       return charArray_b;
2402     case 'c':
2403       return charArray_c;
2404     case 'd':
2405       return charArray_d;
2406     case 'e':
2407       return charArray_e;
2408     case 'f':
2409       return charArray_f;
2410     case 'g':
2411       return charArray_g;
2412     case 'h':
2413       return charArray_h;
2414     case 'i':
2415       return charArray_i;
2416     case 'j':
2417       return charArray_j;
2418     case 'k':
2419       return charArray_k;
2420     case 'l':
2421       return charArray_l;
2422     case 'm':
2423       return charArray_m;
2424     case 'n':
2425       return charArray_n;
2426     case 'o':
2427       return charArray_o;
2428     case 'p':
2429       return charArray_p;
2430     case 'q':
2431       return charArray_q;
2432     case 'r':
2433       return charArray_r;
2434     case 's':
2435       return charArray_s;
2436     case 't':
2437       return charArray_t;
2438     case 'u':
2439       return charArray_u;
2440     case 'v':
2441       return charArray_v;
2442     case 'w':
2443       return charArray_w;
2444     case 'x':
2445       return charArray_x;
2446     case 'y':
2447       return charArray_y;
2448     case 'z':
2449       return charArray_z;
2450     default:
2451       return new char[] { charOne };
2452     }
2453   }
2454
2455   final char[] optimizedCurrentTokenSource2() {
2456     //try to return the same char[] build only once
2457     char c0, c1;
2458     int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) % TableSize;
2459     char[][] table = charArray_length[0][hash];
2460     int i = newEntry2;
2461     while (++i < InternalTableSize) {
2462       char[] charArray = table[i];
2463       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2464         return charArray;
2465     }
2466     //---------other side---------
2467     i = -1;
2468     int max = newEntry2;
2469     while (++i <= max) {
2470       char[] charArray = table[i];
2471       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2472         return charArray;
2473     }
2474     //--------add the entry-------
2475     if (++max >= InternalTableSize)
2476       max = 0;
2477     char[] r;
2478     table[max] = (r = new char[] { c0, c1 });
2479     newEntry2 = max;
2480     return r;
2481   }
2482
2483   final char[] optimizedCurrentTokenSource3() {
2484     //try to return the same char[] build only once
2485     char c0, c1, c2;
2486     int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2487         % TableSize;
2488     char[][] table = charArray_length[1][hash];
2489     int i = newEntry3;
2490     while (++i < InternalTableSize) {
2491       char[] charArray = table[i];
2492       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2493         return charArray;
2494     }
2495     //---------other side---------
2496     i = -1;
2497     int max = newEntry3;
2498     while (++i <= max) {
2499       char[] charArray = table[i];
2500       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2501         return charArray;
2502     }
2503     //--------add the entry-------
2504     if (++max >= InternalTableSize)
2505       max = 0;
2506     char[] r;
2507     table[max] = (r = new char[] { c0, c1, c2 });
2508     newEntry3 = max;
2509     return r;
2510   }
2511
2512   final char[] optimizedCurrentTokenSource4() {
2513     //try to return the same char[] build only once
2514     char c0, c1, c2, c3;
2515     long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2516         + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2517         % TableSize;
2518     char[][] table = charArray_length[2][(int) hash];
2519     int i = newEntry4;
2520     while (++i < InternalTableSize) {
2521       char[] charArray = table[i];
2522       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2523         return charArray;
2524     }
2525     //---------other side---------
2526     i = -1;
2527     int max = newEntry4;
2528     while (++i <= max) {
2529       char[] charArray = table[i];
2530       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2531         return charArray;
2532     }
2533     //--------add the entry-------
2534     if (++max >= InternalTableSize)
2535       max = 0;
2536     char[] r;
2537     table[max] = (r = new char[] { c0, c1, c2, c3 });
2538     newEntry4 = max;
2539     return r;
2540   }
2541
2542   final char[] optimizedCurrentTokenSource5() {
2543     //try to return the same char[] build only once
2544     char c0, c1, c2, c3, c4;
2545     long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2546         + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2547         % TableSize;
2548     char[][] table = charArray_length[3][(int) hash];
2549     int i = newEntry5;
2550     while (++i < InternalTableSize) {
2551       char[] charArray = table[i];
2552       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2553         return charArray;
2554     }
2555     //---------other side---------
2556     i = -1;
2557     int max = newEntry5;
2558     while (++i <= max) {
2559       char[] charArray = table[i];
2560       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2561         return charArray;
2562     }
2563     //--------add the entry-------
2564     if (++max >= InternalTableSize)
2565       max = 0;
2566     char[] r;
2567     table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2568     newEntry5 = max;
2569     return r;
2570   }
2571
2572   final char[] optimizedCurrentTokenSource6() {
2573     //try to return the same char[] build only once
2574     char c0, c1, c2, c3, c4, c5;
2575     long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2576         + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2577         + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2578         % TableSize;
2579     char[][] table = charArray_length[4][(int) hash];
2580     int i = newEntry6;
2581     while (++i < InternalTableSize) {
2582       char[] charArray = table[i];
2583       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2584           && (c5 == charArray[5]))
2585         return charArray;
2586     }
2587     //---------other side---------
2588     i = -1;
2589     int max = newEntry6;
2590     while (++i <= max) {
2591       char[] charArray = table[i];
2592       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2593           && (c5 == charArray[5]))
2594         return charArray;
2595     }
2596     //--------add the entry-------
2597     if (++max >= InternalTableSize)
2598       max = 0;
2599     char[] r;
2600     table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2601     newEntry6 = max;
2602     return r;
2603   }
2604
2605   public final void pushLineSeparator() throws InvalidInputException {
2606     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2607     final int INCREMENT = 250;
2608     if (this.checkNonExternalizedStringLiterals) {
2609       // reinitialize the current line for non externalize strings purpose
2610       currentLine = null;
2611     }
2612     //currentCharacter is at position currentPosition-1
2613     // cr 000D
2614     if (currentCharacter == '\r') {
2615       int separatorPos = currentPosition - 1;
2616       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2617         return;
2618       //System.out.println("CR-" + separatorPos);
2619       try {
2620         lineEnds[++linePtr] = separatorPos;
2621       } catch (IndexOutOfBoundsException e) {
2622         //linePtr value is correct
2623         int oldLength = lineEnds.length;
2624         int[] old = lineEnds;
2625         lineEnds = new int[oldLength + INCREMENT];
2626         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2627         lineEnds[linePtr] = separatorPos;
2628       }
2629       // look-ahead for merged cr+lf
2630       try {
2631         if (source[currentPosition] == '\n') {
2632           //System.out.println("look-ahead LF-" + currentPosition);
2633           lineEnds[linePtr] = currentPosition;
2634           currentPosition++;
2635           wasAcr = false;
2636         } else {
2637           wasAcr = true;
2638         }
2639       } catch (IndexOutOfBoundsException e) {
2640         wasAcr = true;
2641       }
2642     } else {
2643       // lf 000A
2644       if (currentCharacter == '\n') {
2645         //must merge eventual cr followed by lf
2646         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2647           //System.out.println("merge LF-" + (currentPosition - 1));
2648           lineEnds[linePtr] = currentPosition - 1;
2649         } else {
2650           int separatorPos = currentPosition - 1;
2651           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2652             return;
2653           // System.out.println("LF-" + separatorPos);
2654           try {
2655             lineEnds[++linePtr] = separatorPos;
2656           } catch (IndexOutOfBoundsException e) {
2657             //linePtr value is correct
2658             int oldLength = lineEnds.length;
2659             int[] old = lineEnds;
2660             lineEnds = new int[oldLength + INCREMENT];
2661             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2662             lineEnds[linePtr] = separatorPos;
2663           }
2664         }
2665         wasAcr = false;
2666       }
2667     }
2668   }
2669
2670   public final void pushUnicodeLineSeparator() {
2671     // isUnicode means that the \r or \n has been read as a unicode character
2672     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2673     final int INCREMENT = 250;
2674     //currentCharacter is at position currentPosition-1
2675     if (this.checkNonExternalizedStringLiterals) {
2676       // reinitialize the current line for non externalize strings purpose
2677       currentLine = null;
2678     }
2679     // cr 000D
2680     if (currentCharacter == '\r') {
2681       int separatorPos = currentPosition - 6;
2682       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2683         return;
2684       //System.out.println("CR-" + separatorPos);
2685       try {
2686         lineEnds[++linePtr] = separatorPos;
2687       } catch (IndexOutOfBoundsException e) {
2688         //linePtr value is correct
2689         int oldLength = lineEnds.length;
2690         int[] old = lineEnds;
2691         lineEnds = new int[oldLength + INCREMENT];
2692         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2693         lineEnds[linePtr] = separatorPos;
2694       }
2695       // look-ahead for merged cr+lf
2696       if (source[currentPosition] == '\n') {
2697         //System.out.println("look-ahead LF-" + currentPosition);
2698         lineEnds[linePtr] = currentPosition;
2699         currentPosition++;
2700         wasAcr = false;
2701       } else {
2702         wasAcr = true;
2703       }
2704     } else {
2705       // lf 000A
2706       if (currentCharacter == '\n') {
2707         //must merge eventual cr followed by lf
2708         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2709           //System.out.println("merge LF-" + (currentPosition - 1));
2710           lineEnds[linePtr] = currentPosition - 6;
2711         } else {
2712           int separatorPos = currentPosition - 6;
2713           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2714             return;
2715           // System.out.println("LF-" + separatorPos);
2716           try {
2717             lineEnds[++linePtr] = separatorPos;
2718           } catch (IndexOutOfBoundsException e) {
2719             //linePtr value is correct
2720             int oldLength = lineEnds.length;
2721             int[] old = lineEnds;
2722             lineEnds = new int[oldLength + INCREMENT];
2723             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2724             lineEnds[linePtr] = separatorPos;
2725           }
2726         }
2727         wasAcr = false;
2728       }
2729     }
2730   }
2731
2732   public void recordComment(int token) {
2733     // compute position
2734     int stopPosition = this.currentPosition;
2735     switch (token) {
2736     case TokenNameCOMMENT_LINE:
2737       stopPosition = -this.lastCommentLinePosition;
2738       break;
2739     case TokenNameCOMMENT_BLOCK:
2740       stopPosition = -this.currentPosition;
2741       break;
2742     }
2743
2744     // a new comment is recorded
2745     int length = this.commentStops.length;
2746     if (++this.commentPtr >= length) {
2747       System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2748       //grows the positions buffers too
2749       System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2750     }
2751     this.commentStops[this.commentPtr] = stopPosition;
2752     this.commentStarts[this.commentPtr] = this.startPosition;
2753   }
2754
2755   //  public final void recordComment(boolean isJavadoc) {
2756   //    // a new annotation comment is recorded
2757   //    try {
2758   //      commentStops[++commentPtr] = isJavadoc
2759   //          ? currentPosition
2760   //          : -currentPosition;
2761   //    } catch (IndexOutOfBoundsException e) {
2762   //      int oldStackLength = commentStops.length;
2763   //      int[] oldStack = commentStops;
2764   //      commentStops = new int[oldStackLength + 30];
2765   //      System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2766   //      commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2767   //      //grows the positions buffers too
2768   //      int[] old = commentStarts;
2769   //      commentStarts = new int[oldStackLength + 30];
2770   //      System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2771   //    }
2772   //    //the buffer is of a correct size here
2773   //    commentStarts[commentPtr] = startPosition;
2774   //  }
2775   public void resetTo(int begin, int end) {
2776     //reset the scanner to a given position where it may rescan again
2777     diet = false;
2778     initialPosition = startPosition = currentPosition = begin;
2779     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2780     commentPtr = -1; // reset comment stack
2781   }
2782
2783   public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2784     // the string with "\\u" is a legal string of two chars \ and u
2785     //thus we use a direct access to the source (for regular cases).
2786     //    if (unicodeAsBackSlash) {
2787     //      // consume next character
2788     //      unicodeAsBackSlash = false;
2789     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2790     //        && (source[currentPosition] == 'u')) {
2791     //        getNextUnicodeChar();
2792     //      } else {
2793     //        if (withoutUnicodePtr != 0) {
2794     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2795     //        }
2796     //      }
2797     //    } else
2798     currentCharacter = source[currentPosition++];
2799     switch (currentCharacter) {
2800     case '\'':
2801       currentCharacter = '\'';
2802       break;
2803     case '\\':
2804       currentCharacter = '\\';
2805       break;
2806     default:
2807       currentCharacter = '\\';
2808       currentPosition--;
2809     }
2810   }
2811
2812   public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2813     // the string with "\\u" is a legal string of two chars \ and u
2814     //thus we use a direct access to the source (for regular cases).
2815     //    if (unicodeAsBackSlash) {
2816     //      // consume next character
2817     //      unicodeAsBackSlash = false;
2818     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2819     //        && (source[currentPosition] == 'u')) {
2820     //        getNextUnicodeChar();
2821     //      } else {
2822     //        if (withoutUnicodePtr != 0) {
2823     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2824     //        }
2825     //      }
2826     //    } else
2827     currentCharacter = source[currentPosition++];
2828     switch (currentCharacter) {
2829     //      case 'b' :
2830     //        currentCharacter = '\b';
2831     //        break;
2832     case 't':
2833       currentCharacter = '\t';
2834       break;
2835     case 'n':
2836       currentCharacter = '\n';
2837       break;
2838     //      case 'f' :
2839     //        currentCharacter = '\f';
2840     //        break;
2841     case 'r':
2842       currentCharacter = '\r';
2843       break;
2844     case '\"':
2845       currentCharacter = '\"';
2846       break;
2847     case '\'':
2848       currentCharacter = '\'';
2849       break;
2850     case '\\':
2851       currentCharacter = '\\';
2852       break;
2853     case '$':
2854       currentCharacter = '$';
2855       break;
2856     default:
2857       // -----------octal escape--------------
2858       // OctalDigit
2859       // OctalDigit OctalDigit
2860       // ZeroToThree OctalDigit OctalDigit
2861       int number = Character.getNumericValue(currentCharacter);
2862       if (number >= 0 && number <= 7) {
2863         boolean zeroToThreeNot = number > 3;
2864         if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2865           int digit = Character.getNumericValue(currentCharacter);
2866           if (digit >= 0 && digit <= 7) {
2867             number = (number * 8) + digit;
2868             if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2869               if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2870                 // Digit --> ignore last character
2871                 currentPosition--;
2872               } else {
2873                 digit = Character.getNumericValue(currentCharacter);
2874                 if (digit >= 0 && digit <= 7) {
2875                   // has read \ZeroToThree OctalDigit OctalDigit
2876                   number = (number * 8) + digit;
2877                 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2878                   // --> ignore last character
2879                   currentPosition--;
2880                 }
2881               }
2882             } else { // has read \OctalDigit NonDigit--> ignore last
2883               // character
2884               currentPosition--;
2885             }
2886           } else { // has read \OctalDigit NonOctalDigit--> ignore last
2887             // character
2888             currentPosition--;
2889           }
2890         } else { // has read \OctalDigit --> ignore last character
2891           currentPosition--;
2892         }
2893         if (number > 255)
2894           throw new InvalidInputException(INVALID_ESCAPE);
2895         currentCharacter = (char) number;
2896       }
2897     //else
2898     //     throw new InvalidInputException(INVALID_ESCAPE);
2899     }
2900   }
2901
2902   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2903   //    return scanIdentifierOrKeyword( false );
2904   //  }
2905   public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
2906     //test keywords
2907     //first dispatch on the first char.
2908     //then the length. If there are several
2909     //keywors with the same length AND the same first char, then do another
2910     //disptach on the second char :-)...cool....but fast !
2911     useAssertAsAnIndentifier = false;
2912     while (getNextCharAsJavaIdentifierPart()) {
2913     }
2914     ;
2915     if (isVariable) {
2916       //      if (new String(getCurrentTokenSource()).equals("$this")) {
2917       //        return TokenNamethis;
2918       //      }
2919       return TokenNameVariable;
2920     }
2921     int index, length;
2922     char[] data;
2923     char firstLetter;
2924     //    if (withoutUnicodePtr == 0)
2925     //quick test on length == 1 but not on length > 12 while most identifier
2926     //have a length which is <= 12...but there are lots of identifier with
2927     //only one char....
2928     //      {
2929     if ((length = currentPosition - startPosition) == 1)
2930       return TokenNameIdentifier;
2931     //  data = source;
2932     data = new char[length];
2933     index = startPosition;
2934     for (int i = 0; i < length; i++) {
2935       data[i] = Character.toLowerCase(source[index + i]);
2936     }
2937     index = 0;
2938     //    } else {
2939     //      if ((length = withoutUnicodePtr) == 1)
2940     //        return TokenNameIdentifier;
2941     //      // data = withoutUnicodeBuffer;
2942     //      data = new char[withoutUnicodeBuffer.length];
2943     //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2944     //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2945     //      }
2946     //      index = 1;
2947     //    }
2948     firstLetter = data[index];
2949     switch (firstLetter) {
2950     case '_':
2951       switch (length) {
2952       case 8:
2953         //__FILE__
2954         if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
2955             && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
2956           return TokenNameFILE;
2957         index = 0; //__LINE__
2958         if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
2959             && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
2960           return TokenNameLINE;
2961         break;
2962       case 9:
2963         //__CLASS__
2964         if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
2965             && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
2966           return TokenNameCLASS_C;
2967         break;
2968       case 11:
2969         //__METHOD__
2970         if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
2971             && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
2972             && (data[++index] == '_'))
2973           return TokenNameMETHOD_C;
2974         break;
2975       case 12:
2976         //__FUNCTION__
2977         if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
2978             && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
2979             && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
2980           return TokenNameFUNC_C;
2981         break;
2982       }
2983       return TokenNameIdentifier;
2984     case 'a':
2985       // as and array abstract
2986       switch (length) {
2987       case 2:
2988         //as
2989         if ((data[++index] == 's')) {
2990           return TokenNameas;
2991         } else {
2992           return TokenNameIdentifier;
2993         }
2994       case 3:
2995         //and
2996         if ((data[++index] == 'n') && (data[++index] == 'd')) {
2997           return TokenNameand;
2998         } else {
2999           return TokenNameIdentifier;
3000         }
3001       case 5:
3002         // array
3003         if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3004           return TokenNamearray;
3005         else
3006           return TokenNameIdentifier;
3007       case 8:
3008         if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3009             && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3010           return TokenNameabstract;
3011         else
3012           return TokenNameIdentifier;
3013       default:
3014         return TokenNameIdentifier;
3015       }
3016     case 'b':
3017       //break
3018       switch (length) {
3019       case 5:
3020         if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3021           return TokenNamebreak;
3022         else
3023           return TokenNameIdentifier;
3024       default:
3025         return TokenNameIdentifier;
3026       }
3027     case 'c':
3028       //case catch class clone const continue
3029       switch (length) {
3030       case 4:
3031         if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3032           return TokenNamecase;
3033         else
3034           return TokenNameIdentifier;
3035       case 5:
3036         if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3037           return TokenNamecatch;
3038         index = 0;
3039         if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3040           return TokenNameclass;
3041         index = 0;
3042         if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3043           return TokenNameclone;
3044         index = 0;
3045         if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3046           return TokenNameconst;
3047         else
3048           return TokenNameIdentifier;
3049       case 8:
3050         if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3051             && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3052           return TokenNamecontinue;
3053         else
3054           return TokenNameIdentifier;
3055       default:
3056         return TokenNameIdentifier;
3057       }
3058     case 'd':
3059       // declare default do die
3060       // TODO delete define ==> no keyword !
3061       switch (length) {
3062       case 2:
3063         if ((data[++index] == 'o'))
3064           return TokenNamedo;
3065         else
3066           return TokenNameIdentifier;
3067       //          case 6 :
3068       //            if ((data[++index] == 'e')
3069       //              && (data[++index] == 'f')
3070       //              && (data[++index] == 'i')
3071       //              && (data[++index] == 'n')
3072       //              && (data[++index] == 'e'))
3073       //              return TokenNamedefine;
3074       //            else
3075       //              return TokenNameIdentifier;
3076       case 7:
3077         if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3078             && (data[++index] == 'r') && (data[++index] == 'e'))
3079           return TokenNamedeclare;
3080         index = 0;
3081         if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3082             && (data[++index] == 'l') && (data[++index] == 't'))
3083           return TokenNamedefault;
3084         else
3085           return TokenNameIdentifier;
3086       default:
3087         return TokenNameIdentifier;
3088       }
3089     case 'e':
3090       //echo else exit elseif extends eval
3091       switch (length) {
3092       case 4:
3093         if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3094           return TokenNameecho;
3095         else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3096           return TokenNameelse;
3097         else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3098           return TokenNameexit;
3099         else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3100           return TokenNameeval;
3101         else
3102           return TokenNameIdentifier;
3103       case 5:
3104         // endif empty
3105         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3106           return TokenNameendif;
3107         if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3108           return TokenNameempty;
3109         else
3110           return TokenNameIdentifier;
3111       case 6:
3112         // endfor
3113         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3114             && (data[++index] == 'r'))
3115           return TokenNameendfor;
3116         else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3117             && (data[++index] == 'f'))
3118           return TokenNameelseif;
3119         else
3120           return TokenNameIdentifier;
3121       case 7:
3122         if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3123             && (data[++index] == 'd') && (data[++index] == 's'))
3124           return TokenNameextends;
3125         else
3126           return TokenNameIdentifier;
3127       case 8:
3128         // endwhile
3129         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3130             && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3131           return TokenNameendwhile;
3132         else
3133           return TokenNameIdentifier;
3134       case 9:
3135         // endswitch
3136         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3137             && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3138           return TokenNameendswitch;
3139         else
3140           return TokenNameIdentifier;
3141       case 10:
3142         // enddeclare
3143         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3144             && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3145             && (data[++index] == 'e'))
3146           return TokenNameenddeclare;
3147         index = 0;
3148         if ((data[++index] == 'n') // endforeach
3149             && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3150             && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3151           return TokenNameendforeach;
3152         else
3153           return TokenNameIdentifier;
3154       default:
3155         return TokenNameIdentifier;
3156       }
3157     case 'f':
3158       //for false final function
3159       switch (length) {
3160       case 3:
3161         if ((data[++index] == 'o') && (data[++index] == 'r'))
3162           return TokenNamefor;
3163         else
3164           return TokenNameIdentifier;
3165       case 5:
3166         //            if ((data[++index] == 'a') && (data[++index] == 'l')
3167         //                && (data[++index] == 's') && (data[++index] == 'e'))
3168         //              return TokenNamefalse;
3169         if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3170           return TokenNamefinal;
3171         else
3172           return TokenNameIdentifier;
3173       case 7:
3174         // foreach
3175         if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3176             && (data[++index] == 'c') && (data[++index] == 'h'))
3177           return TokenNameforeach;
3178         else
3179           return TokenNameIdentifier;
3180       case 8:
3181         // function
3182         if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3183             && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3184           return TokenNamefunction;
3185         else
3186           return TokenNameIdentifier;
3187       default:
3188         return TokenNameIdentifier;
3189       }
3190     case 'g':
3191       //global
3192       if (length == 6) {
3193         if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3194             && (data[++index] == 'l')) {
3195           return TokenNameglobal;
3196         }
3197       }
3198       return TokenNameIdentifier;
3199     case 'i':
3200       //if int isset include include_once instanceof interface implements
3201       switch (length) {
3202       case 2:
3203         if (data[++index] == 'f')
3204           return TokenNameif;
3205         else
3206           return TokenNameIdentifier;
3207       //          case 3 :
3208       //            if ((data[++index] == 'n') && (data[++index] == 't'))
3209       //              return TokenNameint;
3210       //            else
3211       //              return TokenNameIdentifier;
3212       case 5:
3213         if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3214           return TokenNameisset;
3215         else
3216           return TokenNameIdentifier;
3217       case 7:
3218         if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3219             && (data[++index] == 'd') && (data[++index] == 'e'))
3220           return TokenNameinclude;
3221         else
3222           return TokenNameIdentifier;
3223       case 9:
3224         // interface
3225         if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3226             && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3227           return TokenNameinterface;
3228         else
3229           return TokenNameIdentifier;
3230       case 10:
3231         // instanceof
3232         if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3233             && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3234             && (data[++index] == 'f'))
3235           return TokenNameinstanceof;
3236         if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3237             && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3238             && (data[++index] == 's'))
3239           return TokenNameimplements;
3240         else
3241           return TokenNameIdentifier;
3242       case 12:
3243         if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3244             && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3245             && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3246           return TokenNameinclude_once;
3247         else
3248           return TokenNameIdentifier;
3249       default:
3250         return TokenNameIdentifier;
3251       }
3252     case 'l':
3253       //list
3254       if (length == 4) {
3255         if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3256           return TokenNamelist;
3257         }
3258       }
3259       return TokenNameIdentifier;
3260     case 'n':
3261       // new null
3262       switch (length) {
3263       case 3:
3264         if ((data[++index] == 'e') && (data[++index] == 'w'))
3265           return TokenNamenew;
3266         else
3267           return TokenNameIdentifier;
3268       //          case 4 :
3269       //            if ((data[++index] == 'u') && (data[++index] == 'l')
3270       //                && (data[++index] == 'l'))
3271       //              return TokenNamenull;
3272       //            else
3273       //              return TokenNameIdentifier;
3274       default:
3275         return TokenNameIdentifier;
3276       }
3277     case 'o':
3278       // or old_function
3279       if (length == 2) {
3280         if (data[++index] == 'r') {
3281           return TokenNameor;
3282         }
3283       }
3284       //        if (length == 12) {
3285       //          if ((data[++index] == 'l')
3286       //            && (data[++index] == 'd')
3287       //            && (data[++index] == '_')
3288       //            && (data[++index] == 'f')
3289       //            && (data[++index] == 'u')
3290       //            && (data[++index] == 'n')
3291       //            && (data[++index] == 'c')
3292       //            && (data[++index] == 't')
3293       //            && (data[++index] == 'i')
3294       //            && (data[++index] == 'o')
3295       //            && (data[++index] == 'n')) {
3296       //            return TokenNameold_function;
3297       //          }
3298       //        }
3299       return TokenNameIdentifier;
3300     case 'p':
3301       // print public private protected
3302       switch (length) {
3303       case 5:
3304         if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3305           return TokenNameprint;
3306         } else
3307           return TokenNameIdentifier;
3308       case 6:
3309         if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3310             && (data[++index] == 'c')) {
3311           return TokenNamepublic;
3312         } else
3313           return TokenNameIdentifier;
3314       case 7:
3315         if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3316             && (data[++index] == 't') && (data[++index] == 'e')) {
3317           return TokenNameprivate;
3318         } else
3319           return TokenNameIdentifier;
3320       case 9:
3321         if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3322             && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3323           return TokenNameprotected;
3324         } else
3325           return TokenNameIdentifier;
3326       }
3327       return TokenNameIdentifier;
3328     case 'r':
3329       //return require require_once
3330       if (length == 6) {
3331         if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3332             && (data[++index] == 'n')) {
3333           return TokenNamereturn;
3334         }
3335       } else if (length == 7) {
3336         if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3337             && (data[++index] == 'r') && (data[++index] == 'e')) {
3338           return TokenNamerequire;
3339         }
3340       } else if (length == 12) {
3341         if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3342             && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3343             && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3344           return TokenNamerequire_once;
3345         }
3346       } else
3347         return TokenNameIdentifier;
3348     case 's':
3349       //static switch
3350       switch (length) {
3351       case 6:
3352         if (data[++index] == 't')
3353           if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3354             return TokenNamestatic;
3355           } else
3356             return TokenNameIdentifier;
3357         else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3358             && (data[++index] == 'h'))
3359           return TokenNameswitch;
3360         else
3361           return TokenNameIdentifier;
3362       default:
3363         return TokenNameIdentifier;
3364       }
3365     case 't':
3366       // try true throw
3367       switch (length) {
3368       case 3:
3369         if ((data[++index] == 'r') && (data[++index] == 'y'))
3370           return TokenNametry;
3371         else
3372           return TokenNameIdentifier;
3373       //          case 4 :
3374       //            if ((data[++index] == 'r') && (data[++index] == 'u')
3375       //                && (data[++index] == 'e'))
3376       //              return TokenNametrue;
3377       //            else
3378       //              return TokenNameIdentifier;
3379       case 5:
3380         if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3381           return TokenNamethrow;
3382         else
3383           return TokenNameIdentifier;
3384       default:
3385         return TokenNameIdentifier;
3386       }
3387     case 'u':
3388       //use unset
3389       switch (length) {
3390       case 3:
3391         if ((data[++index] == 's') && (data[++index] == 'e'))
3392           return TokenNameuse;
3393         else
3394           return TokenNameIdentifier;
3395       case 5:
3396         if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3397           return TokenNameunset;
3398         else
3399           return TokenNameIdentifier;
3400       default:
3401         return TokenNameIdentifier;
3402       }
3403     case 'v':
3404       //var
3405       switch (length) {
3406       case 3:
3407         if ((data[++index] == 'a') && (data[++index] == 'r'))
3408           return TokenNamevar;
3409         else
3410           return TokenNameIdentifier;
3411       default:
3412         return TokenNameIdentifier;
3413       }
3414     case 'w':
3415       //while
3416       switch (length) {
3417       case 5:
3418         if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3419           return TokenNamewhile;
3420         else
3421           return TokenNameIdentifier;
3422       //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3423       // (data[++index]=='e') && (data[++index]=='f')&&
3424       // (data[++index]=='p'))
3425       //return TokenNamewidefp ;
3426       //else
3427       //return TokenNameIdentifier;
3428       default:
3429         return TokenNameIdentifier;
3430       }
3431     case 'x':
3432       //xor
3433       switch (length) {
3434       case 3:
3435         if ((data[++index] == 'o') && (data[++index] == 'r'))
3436           return TokenNamexor;
3437         else
3438           return TokenNameIdentifier;
3439       default:
3440         return TokenNameIdentifier;
3441       }
3442     default:
3443       return TokenNameIdentifier;
3444     }
3445   }
3446
3447   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3448     //when entering this method the currentCharacter is the firt
3449     //digit of the number , i.e. it may be preceeded by a . when
3450     //dotPrefix is true
3451     boolean floating = dotPrefix;
3452     if ((!dotPrefix) && (currentCharacter == '0')) {
3453       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3454         //force the first char of the hexa number do exist...
3455         // consume next character
3456         unicodeAsBackSlash = false;
3457         currentCharacter = source[currentPosition++];
3458         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3459         //          && (source[currentPosition] == 'u')) {
3460         //          getNextUnicodeChar();
3461         //        } else {
3462         //          if (withoutUnicodePtr != 0) {
3463         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3464         //          }
3465         //        }
3466         if (Character.digit(currentCharacter, 16) == -1)
3467           throw new InvalidInputException(INVALID_HEXA);
3468         //---end forcing--
3469         while (getNextCharAsDigit(16)) {
3470         }
3471         ;
3472         //        if (getNextChar('l', 'L') >= 0)
3473         //          return TokenNameLongLiteral;
3474         //        else
3475         return TokenNameIntegerLiteral;
3476       }
3477       //there is x or X in the number
3478       //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3479       // 00078.0 is true !!!!! crazy language
3480       if (getNextCharAsDigit()) {
3481         //-------------potential octal-----------------
3482         while (getNextCharAsDigit()) {
3483         }
3484         ;
3485         //        if (getNextChar('l', 'L') >= 0) {
3486         //          return TokenNameLongLiteral;
3487         //        }
3488         //
3489         //        if (getNextChar('f', 'F') >= 0) {
3490         //          return TokenNameFloatingPointLiteral;
3491         //        }
3492         if (getNextChar('d', 'D') >= 0) {
3493           return TokenNameDoubleLiteral;
3494         } else { //make the distinction between octal and float ....
3495           if (getNextChar('.')) { //bingo ! ....
3496             while (getNextCharAsDigit()) {
3497             }
3498             ;
3499             if (getNextChar('e', 'E') >= 0) {
3500               // consume next character
3501               unicodeAsBackSlash = false;
3502               currentCharacter = source[currentPosition++];
3503               //              if (((currentCharacter = source[currentPosition++]) == '\\')
3504               //                && (source[currentPosition] == 'u')) {
3505               //                getNextUnicodeChar();
3506               //              } else {
3507               //                if (withoutUnicodePtr != 0) {
3508               //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3509               //                }
3510               //              }
3511               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3512                 // consume next character
3513                 unicodeAsBackSlash = false;
3514                 currentCharacter = source[currentPosition++];
3515                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3516                 //                  && (source[currentPosition] == 'u')) {
3517                 //                  getNextUnicodeChar();
3518                 //                } else {
3519                 //                  if (withoutUnicodePtr != 0) {
3520                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3521                 //                      currentCharacter;
3522                 //                  }
3523                 //                }
3524               }
3525               if (!Character.isDigit(currentCharacter))
3526                 throw new InvalidInputException(INVALID_FLOAT);
3527               while (getNextCharAsDigit()) {
3528               }
3529               ;
3530             }
3531             //            if (getNextChar('f', 'F') >= 0)
3532             //              return TokenNameFloatingPointLiteral;
3533             getNextChar('d', 'D'); //jump over potential d or D
3534             return TokenNameDoubleLiteral;
3535           } else {
3536             return TokenNameIntegerLiteral;
3537           }
3538         }
3539       } else {
3540         /* carry on */
3541       }
3542     }
3543     while (getNextCharAsDigit()) {
3544     }
3545     ;
3546     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3547     //      return TokenNameLongLiteral;
3548     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3549       while (getNextCharAsDigit()) {
3550       }
3551       ;
3552       floating = true;
3553     }
3554     //if floating is true both exponant and suffix may be optional
3555     if (getNextChar('e', 'E') >= 0) {
3556       floating = true;
3557       // consume next character
3558       unicodeAsBackSlash = false;
3559       currentCharacter = source[currentPosition++];
3560       //      if (((currentCharacter = source[currentPosition++]) == '\\')
3561       //        && (source[currentPosition] == 'u')) {
3562       //        getNextUnicodeChar();
3563       //      } else {
3564       //        if (withoutUnicodePtr != 0) {
3565       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3566       //        }
3567       //      }
3568       if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3569         // next
3570         // character
3571         unicodeAsBackSlash = false;
3572         currentCharacter = source[currentPosition++];
3573         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3574         //          && (source[currentPosition] == 'u')) {
3575         //          getNextUnicodeChar();
3576         //        } else {
3577         //          if (withoutUnicodePtr != 0) {
3578         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3579         //          }
3580         //        }
3581       }
3582       if (!Character.isDigit(currentCharacter))
3583         throw new InvalidInputException(INVALID_FLOAT);
3584       while (getNextCharAsDigit()) {
3585       }
3586       ;
3587     }
3588     if (getNextChar('d', 'D') >= 0)
3589       return TokenNameDoubleLiteral;
3590     //    if (getNextChar('f', 'F') >= 0)
3591     //      return TokenNameFloatingPointLiteral;
3592     //the long flag has been tested before
3593     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3594   }
3595
3596   /**
3597    * Search the line number corresponding to a specific position
3598    *
3599    */
3600   public final int getLineNumber(int position) {
3601     if (lineEnds == null)
3602       return 1;
3603     int length = linePtr + 1;
3604     if (length == 0)
3605       return 1;
3606     int g = 0, d = length - 1;
3607     int m = 0;
3608     while (g <= d) {
3609       m = (g + d) / 2;
3610       if (position < lineEnds[m]) {
3611         d = m - 1;
3612       } else if (position > lineEnds[m]) {
3613         g = m + 1;
3614       } else {
3615         return m + 1;
3616       }
3617     }
3618     if (position < lineEnds[m]) {
3619       return m + 1;
3620     }
3621     return m + 2;
3622   }
3623
3624   public void setPHPMode(boolean mode) {
3625     phpMode = mode;
3626   }
3627
3628   public final void setSource(char[] source) {
3629     //the source-buffer is set to sourceString
3630     if (source == null) {
3631       this.source = new char[0];
3632     } else {
3633       this.source = source;
3634     }
3635     startPosition = -1;
3636     initialPosition = currentPosition = 0;
3637     containsAssertKeyword = false;
3638     withoutUnicodeBuffer = new char[this.source.length];
3639     encapsedStringStack = new Stack();
3640   }
3641
3642   public String toString() {
3643     if (startPosition == source.length)
3644       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3645     if (currentPosition > source.length)
3646       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3647     char front[] = new char[startPosition];
3648     System.arraycopy(source, 0, front, 0, startPosition);
3649     int middleLength = (currentPosition - 1) - startPosition + 1;
3650     char middle[];
3651     if (middleLength > -1) {
3652       middle = new char[middleLength];
3653       System.arraycopy(source, startPosition, middle, 0, middleLength);
3654     } else {
3655       middle = new char[0];
3656     }
3657     char end[] = new char[source.length - (currentPosition - 1)];
3658     System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3659     return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3660         + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3661         + new String(end);
3662   }
3663
3664   public final String toStringAction(int act) {
3665     switch (act) {
3666     case TokenNameERROR:
3667       return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3668     // //$NON-NLS-1$
3669     case TokenNameINLINE_HTML:
3670       return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3671     case TokenNameIdentifier:
3672       return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3673     case TokenNameVariable:
3674       return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3675     case TokenNameabstract:
3676       return "abstract"; //$NON-NLS-1$
3677     case TokenNameand:
3678       return "AND"; //$NON-NLS-1$
3679     case TokenNamearray:
3680       return "array"; //$NON-NLS-1$
3681     case TokenNameas:
3682       return "as"; //$NON-NLS-1$
3683     case TokenNamebreak:
3684       return "break"; //$NON-NLS-1$
3685     case TokenNamecase:
3686       return "case"; //$NON-NLS-1$
3687     case TokenNameclass:
3688       return "class"; //$NON-NLS-1$
3689     case TokenNamecatch:
3690       return "catch"; //$NON-NLS-1$
3691     case TokenNameclone:
3692       //$NON-NLS-1$
3693       return "clone";
3694     case TokenNameconst:
3695       //$NON-NLS-1$
3696       return "const";
3697     case TokenNamecontinue:
3698       return "continue"; //$NON-NLS-1$
3699     case TokenNamedefault:
3700       return "default"; //$NON-NLS-1$
3701     //      case TokenNamedefine :
3702     //        return "define"; //$NON-NLS-1$
3703     case TokenNamedo:
3704       return "do"; //$NON-NLS-1$
3705     case TokenNameecho:
3706       return "echo"; //$NON-NLS-1$
3707     case TokenNameelse:
3708       return "else"; //$NON-NLS-1$
3709     case TokenNameelseif:
3710       return "elseif"; //$NON-NLS-1$
3711     case TokenNameendfor:
3712       return "endfor"; //$NON-NLS-1$
3713     case TokenNameendforeach:
3714       return "endforeach"; //$NON-NLS-1$
3715     case TokenNameendif:
3716       return "endif"; //$NON-NLS-1$
3717     case TokenNameendswitch:
3718       return "endswitch"; //$NON-NLS-1$
3719     case TokenNameendwhile:
3720       return "endwhile"; //$NON-NLS-1$
3721     case TokenNameexit:
3722       return "exit";
3723     case TokenNameextends:
3724       return "extends"; //$NON-NLS-1$
3725     //      case TokenNamefalse :
3726     //        return "false"; //$NON-NLS-1$
3727     case TokenNamefinal:
3728       return "final"; //$NON-NLS-1$
3729     case TokenNamefor:
3730       return "for"; //$NON-NLS-1$
3731     case TokenNameforeach:
3732       return "foreach"; //$NON-NLS-1$
3733     case TokenNamefunction:
3734       return "function"; //$NON-NLS-1$
3735     case TokenNameglobal:
3736       return "global"; //$NON-NLS-1$
3737     case TokenNameif:
3738       return "if"; //$NON-NLS-1$
3739     case TokenNameimplements:
3740       return "implements"; //$NON-NLS-1$
3741     case TokenNameinclude:
3742       return "include"; //$NON-NLS-1$
3743     case TokenNameinclude_once:
3744       return "include_once"; //$NON-NLS-1$
3745     case TokenNameinstanceof:
3746       return "instanceof"; //$NON-NLS-1$
3747     case TokenNameinterface:
3748       return "interface"; //$NON-NLS-1$
3749     case TokenNameisset:
3750       return "isset"; //$NON-NLS-1$
3751     case TokenNamelist:
3752       return "list"; //$NON-NLS-1$
3753     case TokenNamenew:
3754       return "new"; //$NON-NLS-1$
3755     //      case TokenNamenull :
3756     //        return "null"; //$NON-NLS-1$
3757     case TokenNameor:
3758       return "OR"; //$NON-NLS-1$
3759     case TokenNameprint:
3760       return "print"; //$NON-NLS-1$
3761     case TokenNameprivate:
3762       return "private"; //$NON-NLS-1$
3763     case TokenNameprotected:
3764       return "protected"; //$NON-NLS-1$
3765     case TokenNamepublic:
3766       return "public"; //$NON-NLS-1$
3767     case TokenNamerequire:
3768       return "require"; //$NON-NLS-1$
3769     case TokenNamerequire_once:
3770       return "require_once"; //$NON-NLS-1$
3771     case TokenNamereturn:
3772       return "return"; //$NON-NLS-1$
3773     case TokenNamestatic:
3774       return "static"; //$NON-NLS-1$
3775     case TokenNameswitch:
3776       return "switch"; //$NON-NLS-1$
3777     //      case TokenNametrue :
3778     //        return "true"; //$NON-NLS-1$
3779     case TokenNameunset:
3780       return "unset"; //$NON-NLS-1$
3781     case TokenNamevar:
3782       return "var"; //$NON-NLS-1$
3783     case TokenNamewhile:
3784       return "while"; //$NON-NLS-1$
3785     case TokenNamexor:
3786       return "XOR"; //$NON-NLS-1$
3787     //      case TokenNamethis :
3788     //        return "$this"; //$NON-NLS-1$
3789     case TokenNameIntegerLiteral:
3790       return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3791     case TokenNameDoubleLiteral:
3792       return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3793     case TokenNameStringDoubleQuote:
3794       return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3795     case TokenNameStringSingleQuote:
3796       return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3797     case TokenNameStringInterpolated:
3798       return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3799     case TokenNameEncapsedString0:
3800       return "`"; //$NON-NLS-1$
3801     case TokenNameEncapsedString1:
3802       return "\'"; //$NON-NLS-1$
3803     case TokenNameEncapsedString2:
3804       return "\""; //$NON-NLS-1$
3805     case TokenNameSTRING:
3806       return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3807     case TokenNameHEREDOC:
3808       return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3809     case TokenNamePLUS_PLUS:
3810       return "++"; //$NON-NLS-1$
3811     case TokenNameMINUS_MINUS:
3812       return "--"; //$NON-NLS-1$
3813     case TokenNameEQUAL_EQUAL:
3814       return "=="; //$NON-NLS-1$
3815     case TokenNameEQUAL_EQUAL_EQUAL:
3816       return "==="; //$NON-NLS-1$
3817     case TokenNameEQUAL_GREATER:
3818       return "=>"; //$NON-NLS-1$
3819     case TokenNameLESS_EQUAL:
3820       return "<="; //$NON-NLS-1$
3821     case TokenNameGREATER_EQUAL:
3822       return ">="; //$NON-NLS-1$
3823     case TokenNameNOT_EQUAL:
3824       return "!="; //$NON-NLS-1$
3825     case TokenNameNOT_EQUAL_EQUAL:
3826       return "!=="; //$NON-NLS-1$
3827     case TokenNameLEFT_SHIFT:
3828       return "<<"; //$NON-NLS-1$
3829     case TokenNameRIGHT_SHIFT:
3830       return ">>"; //$NON-NLS-1$
3831     case TokenNamePLUS_EQUAL:
3832       return "+="; //$NON-NLS-1$
3833     case TokenNameMINUS_EQUAL:
3834       return "-="; //$NON-NLS-1$
3835     case TokenNameMULTIPLY_EQUAL:
3836       return "*="; //$NON-NLS-1$
3837     case TokenNameDIVIDE_EQUAL:
3838       return "/="; //$NON-NLS-1$
3839     case TokenNameAND_EQUAL:
3840       return "&="; //$NON-NLS-1$
3841     case TokenNameOR_EQUAL:
3842       return "|="; //$NON-NLS-1$
3843     case TokenNameXOR_EQUAL:
3844       return "^="; //$NON-NLS-1$
3845     case TokenNameREMAINDER_EQUAL:
3846       return "%="; //$NON-NLS-1$
3847     case TokenNameDOT_EQUAL:
3848       return ".="; //$NON-NLS-1$
3849     case TokenNameLEFT_SHIFT_EQUAL:
3850       return "<<="; //$NON-NLS-1$
3851     case TokenNameRIGHT_SHIFT_EQUAL:
3852       return ">>="; //$NON-NLS-1$
3853     case TokenNameOR_OR:
3854       return "||"; //$NON-NLS-1$
3855     case TokenNameAND_AND:
3856       return "&&"; //$NON-NLS-1$
3857     case TokenNamePLUS:
3858       return "+"; //$NON-NLS-1$
3859     case TokenNameMINUS:
3860       return "-"; //$NON-NLS-1$
3861     case TokenNameMINUS_GREATER:
3862       return "->";
3863     case TokenNameNOT:
3864       return "!"; //$NON-NLS-1$
3865     case TokenNameREMAINDER:
3866       return "%"; //$NON-NLS-1$
3867     case TokenNameXOR:
3868       return "^"; //$NON-NLS-1$
3869     case TokenNameAND:
3870       return "&"; //$NON-NLS-1$
3871     case TokenNameMULTIPLY:
3872       return "*"; //$NON-NLS-1$
3873     case TokenNameOR:
3874       return "|"; //$NON-NLS-1$
3875     case TokenNameTWIDDLE:
3876       return "~"; //$NON-NLS-1$
3877     case TokenNameTWIDDLE_EQUAL:
3878       return "~="; //$NON-NLS-1$
3879     case TokenNameDIVIDE:
3880       return "/"; //$NON-NLS-1$
3881     case TokenNameGREATER:
3882       return ">"; //$NON-NLS-1$
3883     case TokenNameLESS:
3884       return "<"; //$NON-NLS-1$
3885     case TokenNameLPAREN:
3886       return "("; //$NON-NLS-1$
3887     case TokenNameRPAREN:
3888       return ")"; //$NON-NLS-1$
3889     case TokenNameLBRACE:
3890       return "{"; //$NON-NLS-1$
3891     case TokenNameRBRACE:
3892       return "}"; //$NON-NLS-1$
3893     case TokenNameLBRACKET:
3894       return "["; //$NON-NLS-1$
3895     case TokenNameRBRACKET:
3896       return "]"; //$NON-NLS-1$
3897     case TokenNameSEMICOLON:
3898       return ";"; //$NON-NLS-1$
3899     case TokenNameQUESTION:
3900       return "?"; //$NON-NLS-1$
3901     case TokenNameCOLON:
3902       return ":"; //$NON-NLS-1$
3903     case TokenNameCOMMA:
3904       return ","; //$NON-NLS-1$
3905     case TokenNameDOT:
3906       return "."; //$NON-NLS-1$
3907     case TokenNameEQUAL:
3908       return "="; //$NON-NLS-1$
3909     case TokenNameAT:
3910       return "@";
3911     case TokenNameDOLLAR:
3912       return "$";
3913     case TokenNameDOLLAR_LBRACE:
3914       return "${";
3915     case TokenNameLBRACE_DOLLAR:
3916       return "{$";
3917     case TokenNameEOF:
3918       return "EOF"; //$NON-NLS-1$
3919     case TokenNameWHITESPACE:
3920       return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3921     case TokenNameCOMMENT_LINE:
3922       return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3923     case TokenNameCOMMENT_BLOCK:
3924       return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3925     case TokenNameCOMMENT_PHPDOC:
3926       return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3927     //      case TokenNameHTML :
3928     //        return "HTML(" + new String(getCurrentTokenSource()) + ")";
3929     // //$NON-NLS-1$
3930     case TokenNameFILE:
3931       return "__FILE__"; //$NON-NLS-1$
3932     case TokenNameLINE:
3933       return "__LINE__"; //$NON-NLS-1$
3934     case TokenNameCLASS_C:
3935       return "__CLASS__"; //$NON-NLS-1$
3936     case TokenNameMETHOD_C:
3937       return "__METHOD__"; //$NON-NLS-1$
3938     case TokenNameFUNC_C:
3939       return "__FUNCTION__"; //$NON-NLS-1
3940     case TokenNameboolCAST:
3941       return "( bool )"; //$NON-NLS-1$
3942     case TokenNameintCAST:
3943       return "( int )"; //$NON-NLS-1$
3944     case TokenNamedoubleCAST:
3945       return "( double )"; //$NON-NLS-1$
3946     case TokenNameobjectCAST:
3947       return "( object )"; //$NON-NLS-1$
3948     case TokenNamestringCAST:
3949       return "( string )"; //$NON-NLS-1$
3950     default:
3951       return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3952     }
3953   }
3954
3955   public Scanner() {
3956     this(false, false);
3957   }
3958
3959   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
3960     this(tokenizeComments, tokenizeWhiteSpace, false);
3961   }
3962
3963   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
3964     this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
3965   }
3966
3967   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
3968       boolean assertMode) {
3969     this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null);
3970   }
3971
3972   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
3973       boolean assertMode, boolean tokenizeStrings, char[][] taskTags, char[][] taskPriorities) {
3974     this.eofPosition = Integer.MAX_VALUE;
3975     this.tokenizeComments = tokenizeComments;
3976     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3977     this.tokenizeStrings = tokenizeStrings;
3978     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3979     this.assertMode = assertMode;
3980     this.encapsedStringStack = null;
3981     this.taskTags = taskTags;
3982     this.taskPriorities = taskPriorities;
3983   }
3984
3985   private void checkNonExternalizeString() throws InvalidInputException {
3986     if (currentLine == null)
3987       return;
3988     parseTags(currentLine);
3989   }
3990
3991   private void parseTags(NLSLine line) throws InvalidInputException {
3992     String s = new String(getCurrentTokenSource());
3993     int pos = s.indexOf(TAG_PREFIX);
3994     int lineLength = line.size();
3995     while (pos != -1) {
3996       int start = pos + TAG_PREFIX_LENGTH;
3997       int end = s.indexOf(TAG_POSTFIX, start);
3998       String index = s.substring(start, end);
3999       int i = 0;
4000       try {
4001         i = Integer.parseInt(index) - 1;
4002         // Tags are one based not zero based.
4003       } catch (NumberFormatException e) {
4004         i = -1; // we don't want to consider this as a valid NLS tag
4005       }
4006       if (line.exists(i)) {
4007         line.set(i, null);
4008       }
4009       pos = s.indexOf(TAG_PREFIX, start);
4010     }
4011     this.nonNLSStrings = new StringLiteral[lineLength];
4012     int nonNLSCounter = 0;
4013     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4014       StringLiteral literal = (StringLiteral) iterator.next();
4015       if (literal != null) {
4016         this.nonNLSStrings[nonNLSCounter++] = literal;
4017       }
4018     }
4019     if (nonNLSCounter == 0) {
4020       this.nonNLSStrings = null;
4021       currentLine = null;
4022       return;
4023     }
4024     this.wasNonExternalizedStringLiteral = true;
4025     if (nonNLSCounter != lineLength) {
4026       System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4027     }
4028     currentLine = null;
4029   }
4030
4031   public final void scanEscapeCharacter() throws InvalidInputException {
4032     // the string with "\\u" is a legal string of two chars \ and u
4033     //thus we use a direct access to the source (for regular cases).
4034     if (unicodeAsBackSlash) {
4035       // consume next character
4036       unicodeAsBackSlash = false;
4037       //                        if (((currentCharacter = source[currentPosition++]) == '\\') &&
4038       // (source[currentPosition] == 'u')) {
4039       //                                getNextUnicodeChar();
4040       //                        } else {
4041       if (withoutUnicodePtr != 0) {
4042         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4043         //                              }
4044       }
4045     } else
4046       currentCharacter = source[currentPosition++];
4047     switch (currentCharacter) {
4048     case 'b':
4049       currentCharacter = '\b';
4050       break;
4051     case 't':
4052       currentCharacter = '\t';
4053       break;
4054     case 'n':
4055       currentCharacter = '\n';
4056       break;
4057     case 'f':
4058       currentCharacter = '\f';
4059       break;
4060     case 'r':
4061       currentCharacter = '\r';
4062       break;
4063     case '\"':
4064       currentCharacter = '\"';
4065       break;
4066     case '\'':
4067       currentCharacter = '\'';
4068       break;
4069     case '\\':
4070       currentCharacter = '\\';
4071       break;
4072     default:
4073       // -----------octal escape--------------
4074       // OctalDigit
4075       // OctalDigit OctalDigit
4076       // ZeroToThree OctalDigit OctalDigit
4077       int number = Character.getNumericValue(currentCharacter);
4078       if (number >= 0 && number <= 7) {
4079         boolean zeroToThreeNot = number > 3;
4080         if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4081           int digit = Character.getNumericValue(currentCharacter);
4082           if (digit >= 0 && digit <= 7) {
4083             number = (number * 8) + digit;
4084             if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4085               if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4086                 // Digit --> ignore last character
4087                 currentPosition--;
4088               } else {
4089                 digit = Character.getNumericValue(currentCharacter);
4090                 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4091                   // OctalDigit OctalDigit
4092                   number = (number * 8) + digit;
4093                 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4094                   // --> ignore last character
4095                   currentPosition--;
4096                 }
4097               }
4098             } else { // has read \OctalDigit NonDigit--> ignore last
4099               // character
4100               currentPosition--;
4101             }
4102           } else { // has read \OctalDigit NonOctalDigit--> ignore last
4103             // character
4104             currentPosition--;
4105           }
4106         } else { // has read \OctalDigit --> ignore last character
4107           currentPosition--;
4108         }
4109         if (number > 255)
4110           throw new InvalidInputException(INVALID_ESCAPE);
4111         currentCharacter = (char) number;
4112       } else
4113         throw new InvalidInputException(INVALID_ESCAPE);
4114     }
4115   }
4116
4117   // chech presence of task: tags
4118   public void checkTaskTag(int commentStart, int commentEnd) {
4119     // only look for newer task: tags
4120     if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4121       return;
4122     }
4123     int foundTaskIndex = this.foundTaskCount;
4124     nextChar: for (int i = commentStart; i < commentEnd && i < this.eofPosition; i++) {
4125       char[] tag = null;
4126       char[] priority = null;
4127       // check for tag occurrence
4128       nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4129         tag = this.taskTags[itag];
4130         priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4131         int tagLength = tag.length;
4132         for (int t = 0; t < tagLength; t++) {
4133           if (this.source[i + t] != tag[t])
4134             continue nextTag;
4135         }
4136         if (this.foundTaskTags == null) {
4137           this.foundTaskTags = new char[5][];
4138           this.foundTaskMessages = new char[5][];
4139           this.foundTaskPriorities = new char[5][];
4140           this.foundTaskPositions = new int[5][];
4141         } else if (this.foundTaskCount == this.foundTaskTags.length) {
4142           System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4143           System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4144               this.foundTaskCount);
4145           System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4146               this.foundTaskCount);
4147           System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4148               this.foundTaskCount);
4149         }
4150         this.foundTaskTags[this.foundTaskCount] = tag;
4151         this.foundTaskPriorities[this.foundTaskCount] = priority;
4152         this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4153         this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4154         this.foundTaskCount++;
4155         i += tagLength - 1; // will be incremented when looping
4156       }
4157     }
4158     for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4159       // retrieve message start and end positions
4160       int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4161       int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4162       // at most beginning of next task
4163       if (max_value < msgStart)
4164         max_value = msgStart; // would only occur if tag is before EOF.
4165       int end = -1;
4166       char c;
4167       for (int j = msgStart; j < max_value; j++) {
4168         if ((c = this.source[j]) == '\n' || c == '\r') {
4169           end = j - 1;
4170           break;
4171         }
4172       }
4173       if (end == -1) {
4174         for (int j = max_value; j > msgStart; j--) {
4175           if ((c = this.source[j]) == '*') {
4176             end = j - 1;
4177             break;
4178           }
4179         }
4180         if (end == -1)
4181           end = max_value;
4182       }
4183       if (msgStart == end)
4184         continue; // empty
4185       // trim the message
4186       while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4187         end--;
4188       while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4189         msgStart++;
4190       // update the end position of the task
4191       this.foundTaskPositions[i][1] = end;
4192       // get the message source
4193       final int messageLength = end - msgStart + 1;
4194       char[] message = new char[messageLength];
4195       System.arraycopy(source, msgStart, message, 0, messageLength);
4196       this.foundTaskMessages[i] = message;
4197     }
4198   }
4199 }