net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java

   1 /*******************************************************************************
   2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
   3  * All rights reserved. This program and the accompanying materials
   4  * are made available under the terms of the Common Public License v0.5
   5  * which accompanies this distribution, and is available at
   6  * http://www.eclipse.org/legal/cpl-v05.html
   7  *
   8  * Contributors:
   9  *     IBM Corporation - initial API and implementation
  10  ******************************************************************************/
  11 package net.sourceforge.phpdt.internal.compiler.parser;
  12
  13 import java.util.ArrayList;
  14 import java.util.Iterator;
  15 import java.util.List;
  16
  17 import net.sourceforge.phpdt.core.compiler.CharOperation;
  18 import net.sourceforge.phpdt.core.compiler.IScanner;
  19 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
  20 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
  21 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
  22
  23 public class Scanner implements IScanner, ITerminalSymbols {
  24
  25   /* APIs ares
  26    - getNextToken() which return the current type of the token
  27      (this value is not memorized by the scanner)
  28    - getCurrentTokenSource() which provides with the token "REAL" source
  29      (aka all unicode have been transformed into a correct char)
  30    - sourceStart gives the position into the stream
  31    - currentPosition-1 gives the sourceEnd position into the stream
  32   */
  33
  34   // 1.4 feature
  35   private boolean assertMode;
  36   public boolean useAssertAsAnIndentifier = false;
  37   //flag indicating if processed source contains occurrences of keyword assert
  38   public boolean containsAssertKeyword = false;
  39
  40   public boolean recordLineSeparator;
  41   public boolean phpMode = false;
  42
  43   public char currentCharacter;
  44   public int startPosition;
  45   public int currentPosition;
  46   public int initialPosition, eofPosition;
  47   // after this position eof are generated instead of real token from the source
  48
  49   public boolean tokenizeComments;
  50   public boolean tokenizeWhiteSpace;
  51
  52   //source should be viewed as a window (aka a part)
  53   //of a entire very large stream
  54   public char source[];
  55
  56   //unicode support
  57   public char[] withoutUnicodeBuffer;
  58   public int withoutUnicodePtr;
  59   //when == 0 ==> no unicode in the current token
  60   public boolean unicodeAsBackSlash = false;
  61
  62   public boolean scanningFloatLiteral = false;
  63
  64   //support for /** comments
  65   //public char[][] comments = new char[10][];
  66   public int[] commentStops = new int[10];
  67   public int[] commentStarts = new int[10];
  68   public int commentPtr = -1; // no comment test with commentPtr value -1
  69
  70   //diet parsing support - jump over some method body when requested
  71   public boolean diet = false;
  72
  73   //support for the  poor-line-debuggers ....
  74   //remember the position of the cr/lf
  75   public int[] lineEnds = new int[250];
  76   public int linePtr = -1;
  77   public boolean wasAcr = false;
  78
  79   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
  80
  81   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
  82   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
  83   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
  84   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
  85   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
  86   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
  87   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
  88
  89   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
  90   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
  91   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
  92   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
  93
  94   //----------------optimized identifier managment------------------
  95   static final char[] charArray_a = new char[] { 'a' },
  96     charArray_b = new char[] { 'b' },
  97     charArray_c = new char[] { 'c' },
  98     charArray_d = new char[] { 'd' },
  99     charArray_e = new char[] { 'e' },
 100     charArray_f = new char[] { 'f' },
 101     charArray_g = new char[] { 'g' },
 102     charArray_h = new char[] { 'h' },
 103     charArray_i = new char[] { 'i' },
 104     charArray_j = new char[] { 'j' },
 105     charArray_k = new char[] { 'k' },
 106     charArray_l = new char[] { 'l' },
 107     charArray_m = new char[] { 'm' },
 108     charArray_n = new char[] { 'n' },
 109     charArray_o = new char[] { 'o' },
 110     charArray_p = new char[] { 'p' },
 111     charArray_q = new char[] { 'q' },
 112     charArray_r = new char[] { 'r' },
 113     charArray_s = new char[] { 's' },
 114     charArray_t = new char[] { 't' },
 115     charArray_u = new char[] { 'u' },
 116     charArray_v = new char[] { 'v' },
 117     charArray_w = new char[] { 'w' },
 118     charArray_x = new char[] { 'x' },
 119     charArray_y = new char[] { 'y' },
 120     charArray_z = new char[] { 'z' };
 121
 122   static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
 123   static final int TableSize = 30, InternalTableSize = 6;
 124   //30*6 = 180 entries
 125   public static final int OptimizedLength = 6;
 126   public /*static*/
 127   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
 128   // support for detecting non-externalized string literals
 129   int currentLineNr = -1;
 130   int previousLineNr = -1;
 131   NLSLine currentLine = null;
 132   List lines = new ArrayList();
 133   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
 134   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
 135   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
 136   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
 137   public StringLiteral[] nonNLSStrings = null;
 138   public boolean checkNonExternalizedStringLiterals = true;
 139   public boolean wasNonExternalizedStringLiteral = false;
 140
 141   /*static*/ {
 142     for (int i = 0; i < 6; i++) {
 143       for (int j = 0; j < TableSize; j++) {
 144         for (int k = 0; k < InternalTableSize; k++) {
 145           charArray_length[i][j][k] = initCharArray;
 146         }
 147       }
 148     }
 149   }
 150   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
 151
 152   public static final int RoundBracket = 0;
 153   public static final int SquareBracket = 1;
 154   public static final int CurlyBracket = 2;
 155   public static final int BracketKinds = 3;
 156
 157         // task tag support
 158         public char[][] foundTaskTags = null;
 159         public char[][] foundTaskMessages;
 160         public char[][] foundTaskPriorities = null;
 161         public int[][] foundTaskPositions;
 162         public int foundTaskCount = 0;
 163         public char[][] taskTags = null;
 164         public char[][] taskPriorities = null;
 165
 166   public static final boolean DEBUG = false;
 167
 168   public Scanner() {
 169     this(false, false);
 170   }
 171   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
 172     this(tokenizeComments, tokenizeWhiteSpace, false);
 173   }
 174
 175   /**
 176    * Determines if the specified character is
 177    * permissible as the first character in a PHP identifier
 178    */
 179   public static boolean isPHPIdentifierStart(char ch) {
 180     return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 181   }
 182
 183   /**
 184    * Determines if the specified character may be part of a PHP identifier as
 185    * other than the first character
 186    */
 187   public static boolean isPHPIdentifierPart(char ch) {
 188     return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 189   }
 190
 191   public final boolean atEnd() {
 192     // This code is not relevant if source is
 193     // Only a part of the real stream input
 194
 195     return source.length == currentPosition;
 196   }
 197   public char[] getCurrentIdentifierSource() {
 198     //return the token REAL source (aka unicodes are precomputed)
 199
 200     char[] result;
 201     //    if (withoutUnicodePtr != 0)
 202     //      //0 is used as a fast test flag so the real first char is in position 1
 203     //      System.arraycopy(
 204     //        withoutUnicodeBuffer,
 205     //        1,
 206     //        result = new char[withoutUnicodePtr],
 207     //        0,
 208     //        withoutUnicodePtr);
 209     //    else {
 210     int length = currentPosition - startPosition;
 211     switch (length) { // see OptimizedLength
 212       case 1 :
 213         return optimizedCurrentTokenSource1();
 214       case 2 :
 215         return optimizedCurrentTokenSource2();
 216       case 3 :
 217         return optimizedCurrentTokenSource3();
 218       case 4 :
 219         return optimizedCurrentTokenSource4();
 220       case 5 :
 221         return optimizedCurrentTokenSource5();
 222       case 6 :
 223         return optimizedCurrentTokenSource6();
 224     }
 225     //no optimization
 226     System.arraycopy(source, startPosition, result = new char[length], 0, length);
 227     //   }
 228     return result;
 229   }
 230   public int getCurrentTokenEndPosition() {
 231     return this.currentPosition - 1;
 232   }
 233
 234   public final char[] getCurrentTokenSource() {
 235     // Return the token REAL source (aka unicodes are precomputed)
 236
 237     char[] result;
 238     //    if (withoutUnicodePtr != 0)
 239     //      // 0 is used as a fast test flag so the real first char is in position 1
 240     //      System.arraycopy(
 241     //        withoutUnicodeBuffer,
 242     //        1,
 243     //        result = new char[withoutUnicodePtr],
 244     //        0,
 245     //        withoutUnicodePtr);
 246     //    else {
 247     int length;
 248     System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
 249     //    }
 250     return result;
 251   }
 252
 253   public final char[] getCurrentTokenSource(int startPos) {
 254     // Return the token REAL source (aka unicodes are precomputed)
 255
 256     char[] result;
 257     //    if (withoutUnicodePtr != 0)
 258     //      // 0 is used as a fast test flag so the real first char is in position 1
 259     //      System.arraycopy(
 260     //        withoutUnicodeBuffer,
 261     //        1,
 262     //        result = new char[withoutUnicodePtr],
 263     //        0,
 264     //        withoutUnicodePtr);
 265     //    else {
 266     int length;
 267     System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
 268     //  }
 269     return result;
 270   }
 271
 272   public final char[] getCurrentTokenSourceString() {
 273     //return the token REAL source (aka unicodes are precomputed).
 274     //REMOVE the two " that are at the beginning and the end.
 275
 276     char[] result;
 277     if (withoutUnicodePtr != 0)
 278       //0 is used as a fast test flag so the real first char is in position 1
 279       System.arraycopy(withoutUnicodeBuffer, 2,
 280       //2 is 1 (real start) + 1 (to jump over the ")
 281       result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
 282     else {
 283       int length;
 284       System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 285     }
 286     return result;
 287   }
 288   public int getCurrentTokenStartPosition() {
 289     return this.startPosition;
 290   }
 291
 292   public final char[] getCurrentStringLiteralSource() {
 293     // Return the token REAL source (aka unicodes are precomputed)
 294
 295     char[] result;
 296
 297     int length;
 298     System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 299     //    }
 300     return result;
 301   }
 302
 303   /*
 304    * Search the source position corresponding to the end of a given line number
 305    *
 306    * Line numbers are 1-based, and relative to the scanner initialPosition.
 307    * Character positions are 0-based.
 308    *
 309    * In case the given line number is inconsistent, answers -1.
 310    */
 311   public final int getLineEnd(int lineNumber) {
 312
 313     if (lineEnds == null)
 314       return -1;
 315     if (lineNumber >= lineEnds.length)
 316       return -1;
 317     if (lineNumber <= 0)
 318       return -1;
 319
 320     if (lineNumber == lineEnds.length - 1)
 321       return eofPosition;
 322     return lineEnds[lineNumber - 1];
 323     // next line start one character behind the lineEnd of the previous line
 324   }
 325   /**
 326    * Search the source position corresponding to the beginning of a given line number
 327    *
 328    * Line numbers are 1-based, and relative to the scanner initialPosition.
 329    * Character positions are 0-based.
 330    *
 331    * e.g.       getLineStart(1) --> 0   i.e. first line starts at character 0.
 332    *
 333    * In case the given line number is inconsistent, answers -1.
 334    */
 335   public final int getLineStart(int lineNumber) {
 336
 337     if (lineEnds == null)
 338       return -1;
 339     if (lineNumber >= lineEnds.length)
 340       return -1;
 341     if (lineNumber <= 0)
 342       return -1;
 343
 344     if (lineNumber == 1)
 345       return initialPosition;
 346     return lineEnds[lineNumber - 2] + 1;
 347     // next line start one character behind the lineEnd of the previous line
 348   }
 349   public final boolean getNextChar(char testedChar) {
 350     //BOOLEAN
 351     //handle the case of unicode.
 352     //when a unicode appears then we must use a buffer that holds char internal values
 353     //At the end of this method currentCharacter holds the new visited char
 354     //and currentPosition points right next after it
 355     //Both previous lines are true if the currentCharacter is == to the testedChar
 356     //On false, no side effect has occured.
 357
 358     //ALL getNextChar.... ARE OPTIMIZED COPIES
 359
 360     int temp = currentPosition;
 361     try {
 362       currentCharacter = source[currentPosition++];
 363       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 364       //        && (source[currentPosition] == 'u')) {
 365       //        //-------------unicode traitement ------------
 366       //        int c1, c2, c3, c4;
 367       //        int unicodeSize = 6;
 368       //        currentPosition++;
 369       //        while (source[currentPosition] == 'u') {
 370       //          currentPosition++;
 371       //          unicodeSize++;
 372       //        }
 373       //
 374       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 375       //          || c1 < 0)
 376       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 377       //            || c2 < 0)
 378       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 379       //            || c3 < 0)
 380       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 381       //            || c4 < 0)) {
 382       //          currentPosition = temp;
 383       //          return false;
 384       //        }
 385       //
 386       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 387       //        if (currentCharacter != testedChar) {
 388       //          currentPosition = temp;
 389       //          return false;
 390       //        }
 391       //        unicodeAsBackSlash = currentCharacter == '\\';
 392       //
 393       //        //need the unicode buffer
 394       //        if (withoutUnicodePtr == 0) {
 395       //          //buffer all the entries that have been left aside....
 396       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 397       //          System.arraycopy(
 398       //            source,
 399       //            startPosition,
 400       //            withoutUnicodeBuffer,
 401       //            1,
 402       //            withoutUnicodePtr);
 403       //        }
 404       //        //fill the buffer with the char
 405       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 406       //        return true;
 407       //
 408       //      } //-------------end unicode traitement--------------
 409       //      else {
 410       if (currentCharacter != testedChar) {
 411         currentPosition = temp;
 412         return false;
 413       }
 414       unicodeAsBackSlash = false;
 415       //        if (withoutUnicodePtr != 0)
 416       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 417       return true;
 418       //      }
 419     } catch (IndexOutOfBoundsException e) {
 420       unicodeAsBackSlash = false;
 421       currentPosition = temp;
 422       return false;
 423     }
 424   }
 425   public final int getNextChar(char testedChar1, char testedChar2) {
 426     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
 427     //test can be done with (x==0) for the first and (x>0) for the second
 428     //handle the case of unicode.
 429     //when a unicode appears then we must use a buffer that holds char internal values
 430     //At the end of this method currentCharacter holds the new visited char
 431     //and currentPosition points right next after it
 432     //Both previous lines are true if the currentCharacter is == to the testedChar1/2
 433     //On false, no side effect has occured.
 434
 435     //ALL getNextChar.... ARE OPTIMIZED COPIES
 436
 437     int temp = currentPosition;
 438     try {
 439       int result;
 440       currentCharacter = source[currentPosition++];
 441       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 442       //        && (source[currentPosition] == 'u')) {
 443       //        //-------------unicode traitement ------------
 444       //        int c1, c2, c3, c4;
 445       //        int unicodeSize = 6;
 446       //        currentPosition++;
 447       //        while (source[currentPosition] == 'u') {
 448       //          currentPosition++;
 449       //          unicodeSize++;
 450       //        }
 451       //
 452       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 453       //          || c1 < 0)
 454       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 455       //            || c2 < 0)
 456       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 457       //            || c3 < 0)
 458       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 459       //            || c4 < 0)) {
 460       //          currentPosition = temp;
 461       //          return 2;
 462       //        }
 463       //
 464       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 465       //        if (currentCharacter == testedChar1)
 466       //          result = 0;
 467       //        else if (currentCharacter == testedChar2)
 468       //          result = 1;
 469       //        else {
 470       //          currentPosition = temp;
 471       //          return -1;
 472       //        }
 473       //
 474       //        //need the unicode buffer
 475       //        if (withoutUnicodePtr == 0) {
 476       //          //buffer all the entries that have been left aside....
 477       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 478       //          System.arraycopy(
 479       //            source,
 480       //            startPosition,
 481       //            withoutUnicodeBuffer,
 482       //            1,
 483       //            withoutUnicodePtr);
 484       //        }
 485       //        //fill the buffer with the char
 486       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 487       //        return result;
 488       //      } //-------------end unicode traitement--------------
 489       //      else {
 490       if (currentCharacter == testedChar1)
 491         result = 0;
 492       else if (currentCharacter == testedChar2)
 493         result = 1;
 494       else {
 495         currentPosition = temp;
 496         return -1;
 497       }
 498
 499       //        if (withoutUnicodePtr != 0)
 500       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 501       return result;
 502       //     }
 503     } catch (IndexOutOfBoundsException e) {
 504       currentPosition = temp;
 505       return -1;
 506     }
 507   }
 508   public final boolean getNextCharAsDigit() {
 509     //BOOLEAN
 510     //handle the case of unicode.
 511     //when a unicode appears then we must use a buffer that holds char internal values
 512     //At the end of this method currentCharacter holds the new visited char
 513     //and currentPosition points right next after it
 514     //Both previous lines are true if the currentCharacter is a digit
 515     //On false, no side effect has occured.
 516
 517     //ALL getNextChar.... ARE OPTIMIZED COPIES
 518
 519     int temp = currentPosition;
 520     try {
 521       currentCharacter = source[currentPosition++];
 522       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 523       //        && (source[currentPosition] == 'u')) {
 524       //        //-------------unicode traitement ------------
 525       //        int c1, c2, c3, c4;
 526       //        int unicodeSize = 6;
 527       //        currentPosition++;
 528       //        while (source[currentPosition] == 'u') {
 529       //          currentPosition++;
 530       //          unicodeSize++;
 531       //        }
 532       //
 533       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 534       //          || c1 < 0)
 535       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 536       //            || c2 < 0)
 537       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 538       //            || c3 < 0)
 539       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 540       //            || c4 < 0)) {
 541       //          currentPosition = temp;
 542       //          return false;
 543       //        }
 544       //
 545       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 546       //        if (!Character.isDigit(currentCharacter)) {
 547       //          currentPosition = temp;
 548       //          return false;
 549       //        }
 550       //
 551       //        //need the unicode buffer
 552       //        if (withoutUnicodePtr == 0) {
 553       //          //buffer all the entries that have been left aside....
 554       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 555       //          System.arraycopy(
 556       //            source,
 557       //            startPosition,
 558       //            withoutUnicodeBuffer,
 559       //            1,
 560       //            withoutUnicodePtr);
 561       //        }
 562       //        //fill the buffer with the char
 563       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 564       //        return true;
 565       //      } //-------------end unicode traitement--------------
 566       //      else {
 567       if (!Character.isDigit(currentCharacter)) {
 568         currentPosition = temp;
 569         return false;
 570       }
 571       //        if (withoutUnicodePtr != 0)
 572       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 573       return true;
 574       //      }
 575     } catch (IndexOutOfBoundsException e) {
 576       currentPosition = temp;
 577       return false;
 578     }
 579   }
 580   public final boolean getNextCharAsDigit(int radix) {
 581     //BOOLEAN
 582     //handle the case of unicode.
 583     //when a unicode appears then we must use a buffer that holds char internal values
 584     //At the end of this method currentCharacter holds the new visited char
 585     //and currentPosition points right next after it
 586     //Both previous lines are true if the currentCharacter is a digit base on radix
 587     //On false, no side effect has occured.
 588
 589     //ALL getNextChar.... ARE OPTIMIZED COPIES
 590
 591     int temp = currentPosition;
 592     try {
 593       currentCharacter = source[currentPosition++];
 594       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 595       //        && (source[currentPosition] == 'u')) {
 596       //        //-------------unicode traitement ------------
 597       //        int c1, c2, c3, c4;
 598       //        int unicodeSize = 6;
 599       //        currentPosition++;
 600       //        while (source[currentPosition] == 'u') {
 601       //          currentPosition++;
 602       //          unicodeSize++;
 603       //        }
 604       //
 605       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 606       //          || c1 < 0)
 607       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 608       //            || c2 < 0)
 609       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 610       //            || c3 < 0)
 611       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 612       //            || c4 < 0)) {
 613       //          currentPosition = temp;
 614       //          return false;
 615       //        }
 616       //
 617       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 618       //        if (Character.digit(currentCharacter, radix) == -1) {
 619       //          currentPosition = temp;
 620       //          return false;
 621       //        }
 622       //
 623       //        //need the unicode buffer
 624       //        if (withoutUnicodePtr == 0) {
 625       //          //buffer all the entries that have been left aside....
 626       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 627       //          System.arraycopy(
 628       //            source,
 629       //            startPosition,
 630       //            withoutUnicodeBuffer,
 631       //            1,
 632       //            withoutUnicodePtr);
 633       //        }
 634       //        //fill the buffer with the char
 635       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 636       //        return true;
 637       //      } //-------------end unicode traitement--------------
 638       //      else {
 639       if (Character.digit(currentCharacter, radix) == -1) {
 640         currentPosition = temp;
 641         return false;
 642       }
 643       //        if (withoutUnicodePtr != 0)
 644       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 645       return true;
 646       //      }
 647     } catch (IndexOutOfBoundsException e) {
 648       currentPosition = temp;
 649       return false;
 650     }
 651   }
 652   public boolean getNextCharAsJavaIdentifierPart() {
 653     //BOOLEAN
 654     //handle the case of unicode.
 655     //when a unicode appears then we must use a buffer that holds char internal values
 656     //At the end of this method currentCharacter holds the new visited char
 657     //and currentPosition points right next after it
 658     //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
 659     //On false, no side effect has occured.
 660
 661     //ALL getNextChar.... ARE OPTIMIZED COPIES
 662
 663     int temp = currentPosition;
 664     try {
 665       currentCharacter = source[currentPosition++];
 666       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 667       //        && (source[currentPosition] == 'u')) {
 668       //        //-------------unicode traitement ------------
 669       //        int c1, c2, c3, c4;
 670       //        int unicodeSize = 6;
 671       //        currentPosition++;
 672       //        while (source[currentPosition] == 'u') {
 673       //          currentPosition++;
 674       //          unicodeSize++;
 675       //        }
 676       //
 677       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 678       //          || c1 < 0)
 679       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 680       //            || c2 < 0)
 681       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 682       //            || c3 < 0)
 683       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 684       //            || c4 < 0)) {
 685       //          currentPosition = temp;
 686       //          return false;
 687       //        }
 688       //
 689       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 690       //        if (!isPHPIdentifierPart(currentCharacter)) {
 691       //          currentPosition = temp;
 692       //          return false;
 693       //        }
 694       //
 695       //        //need the unicode buffer
 696       //        if (withoutUnicodePtr == 0) {
 697       //          //buffer all the entries that have been left aside....
 698       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 699       //          System.arraycopy(
 700       //            source,
 701       //            startPosition,
 702       //            withoutUnicodeBuffer,
 703       //            1,
 704       //            withoutUnicodePtr);
 705       //        }
 706       //        //fill the buffer with the char
 707       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 708       //        return true;
 709       //      } //-------------end unicode traitement--------------
 710       //      else {
 711       if (!isPHPIdentifierPart(currentCharacter)) {
 712         currentPosition = temp;
 713         return false;
 714       }
 715
 716       //        if (withoutUnicodePtr != 0)
 717       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 718       return true;
 719       //      }
 720     } catch (IndexOutOfBoundsException e) {
 721       currentPosition = temp;
 722       return false;
 723     }
 724   }
 725
 726   public int getNextToken() throws InvalidInputException {
 727     int htmlPosition = currentPosition;
 728     try {
 729       while (!phpMode) {
 730         currentCharacter = source[currentPosition++];
 731         if (currentCharacter == '<') {
 732           if (getNextChar('?')) {
 733             currentCharacter = source[currentPosition++];
 734             if ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
 735               // <?
 736               startPosition = currentPosition;
 737               phpMode = true;
 738               if (tokenizeWhiteSpace) {
 739                 // && (whiteStart != currentPosition - 1)) {
 740                 // reposition scanner in case we are interested by spaces as tokens
 741                 startPosition = htmlPosition;
 742                 return TokenNameHTML;
 743               }
 744             } else {
 745               boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
 746               if (phpStart) {
 747                 int test = getNextChar('H', 'h');
 748                 if (test >= 0) {
 749                   test = getNextChar('P', 'p');
 750                   if (test >= 0) {
 751                     // <?PHP  <?php
 752                     startPosition = currentPosition;
 753                     phpMode = true;
 754
 755                     if (tokenizeWhiteSpace) {
 756                       // && (whiteStart != currentPosition - 1)) {
 757                       // reposition scanner in case we are interested by spaces as tokens
 758                       startPosition = htmlPosition;
 759                       return TokenNameHTML;
 760                     }
 761                   }
 762                 }
 763               }
 764             }
 765           }
 766         }
 767
 768         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 769           if (recordLineSeparator) {
 770             pushLineSeparator();
 771           } else {
 772             currentLine = null;
 773           }
 774         }
 775       }
 776     } //-----------------end switch while try--------------------
 777     catch (IndexOutOfBoundsException e) {
 778       if (tokenizeWhiteSpace) {
 779         // && (whiteStart != currentPosition - 1)) {
 780         // reposition scanner in case we are interested by spaces as tokens
 781         startPosition = htmlPosition;
 782       }
 783       return TokenNameEOF;
 784     }
 785
 786     if (phpMode) {
 787       this.wasAcr = false;
 788       if (diet) {
 789         jumpOverMethodBody();
 790         diet = false;
 791         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
 792       }
 793       try {
 794         while (true) { //loop for jumping over comments
 795           withoutUnicodePtr = 0;
 796           //start with a new token (even comment written with unicode )
 797
 798           // ---------Consume white space and handles startPosition---------
 799           int whiteStart = currentPosition;
 800           boolean isWhiteSpace;
 801           do {
 802             startPosition = currentPosition;
 803             currentCharacter = source[currentPosition++];
 804             //            if (((currentCharacter = source[currentPosition++]) == '\\')
 805             //              && (source[currentPosition] == 'u')) {
 806             //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
 807             //            } else {
 808             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 809               checkNonExternalizeString();
 810               if (recordLineSeparator) {
 811                 pushLineSeparator();
 812               } else {
 813                 currentLine = null;
 814               }
 815             }
 816             isWhiteSpace = (currentCharacter == ' ') || Character.isWhitespace(currentCharacter);
 817             //            }
 818           } while (isWhiteSpace);
 819           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
 820             // reposition scanner in case we are interested by spaces as tokens
 821             currentPosition--;
 822             startPosition = whiteStart;
 823             return TokenNameWHITESPACE;
 824           }
 825           //little trick to get out in the middle of a source compuation
 826           if (currentPosition > eofPosition)
 827             return TokenNameEOF;
 828
 829           // ---------Identify the next token-------------
 830
 831           switch (currentCharacter) {
 832             case '(' :
 833               return TokenNameLPAREN;
 834             case ')' :
 835               return TokenNameRPAREN;
 836             case '{' :
 837               return TokenNameLBRACE;
 838             case '}' :
 839               return TokenNameRBRACE;
 840             case '[' :
 841               return TokenNameLBRACKET;
 842             case ']' :
 843               return TokenNameRBRACKET;
 844             case ';' :
 845               return TokenNameSEMICOLON;
 846             case ',' :
 847               return TokenNameCOMMA;
 848
 849             case '.' :
 850               if (getNextCharAsDigit())
 851                 return scanNumber(true);
 852               return TokenNameDOT;
 853             case '+' :
 854               {
 855                 int test;
 856                 if ((test = getNextChar('+', '=')) == 0)
 857                   return TokenNamePLUS_PLUS;
 858                 if (test > 0)
 859                   return TokenNamePLUS_EQUAL;
 860                 return TokenNamePLUS;
 861               }
 862             case '-' :
 863               {
 864                 int test;
 865                 if ((test = getNextChar('-', '=')) == 0)
 866                   return TokenNameMINUS_MINUS;
 867                 if (test > 0)
 868                   return TokenNameMINUS_EQUAL;
 869                 if (getNextChar('>'))
 870                   return TokenNameMINUS_GREATER;
 871
 872                 return TokenNameMINUS;
 873               }
 874             case '~' :
 875               if (getNextChar('='))
 876                 return TokenNameTWIDDLE_EQUAL;
 877               return TokenNameTWIDDLE;
 878             case '!' :
 879               if (getNextChar('=')) {
 880                 if (getNextChar('=')) {
 881                   return TokenNameNOT_EQUAL_EQUAL;
 882                 }
 883                 return TokenNameNOT_EQUAL;
 884               }
 885               return TokenNameNOT;
 886             case '*' :
 887               if (getNextChar('='))
 888                 return TokenNameMULTIPLY_EQUAL;
 889               return TokenNameMULTIPLY;
 890             case '%' :
 891               if (getNextChar('='))
 892                 return TokenNameREMAINDER_EQUAL;
 893               return TokenNameREMAINDER;
 894             case '<' :
 895               {
 896                 int test;
 897                 if ((test = getNextChar('=', '<')) == 0)
 898                   return TokenNameLESS_EQUAL;
 899                 if (test > 0) {
 900                   if (getNextChar('='))
 901                     return TokenNameLEFT_SHIFT_EQUAL;
 902                   if (getNextChar('<')) {
 903                     int heredocStart = currentPosition;
 904                     int heredocLength = 0;
 905                     currentCharacter = source[currentPosition++];
 906                     if (isPHPIdentifierStart(currentCharacter)) {
 907                       currentCharacter = source[currentPosition++];
 908                     } else {
 909                       return TokenNameERROR;
 910                     }
 911                     while (isPHPIdentifierPart(currentCharacter)) {
 912                       currentCharacter = source[currentPosition++];
 913                     }
 914
 915                     heredocLength = currentPosition - heredocStart - 1;
 916
 917                     // heredoc end-tag determination
 918                     boolean endTag = true;
 919                     char ch;
 920                     do {
 921                       ch = source[currentPosition++];
 922                       if (ch == '\r' || ch == '\n') {
 923                         if (recordLineSeparator) {
 924                           pushLineSeparator();
 925                         } else {
 926                           currentLine = null;
 927                         }
 928                         for (int i = 0; i < heredocLength; i++) {
 929                           if (source[currentPosition + i] != source[heredocStart + i]) {
 930                             endTag = false;
 931                             break;
 932                           }
 933                         }
 934                         if (endTag) {
 935                           currentPosition += heredocLength - 1;
 936                           currentCharacter = source[currentPosition++];
 937                           break; // do...while loop
 938                         } else {
 939                           endTag = true;
 940                         }
 941                       }
 942
 943                     } while (true);
 944
 945                     return TokenNameHEREDOC;
 946                   }
 947                   return TokenNameLEFT_SHIFT;
 948                 }
 949                 return TokenNameLESS;
 950               }
 951             case '>' :
 952               {
 953                 int test;
 954                 if ((test = getNextChar('=', '>')) == 0)
 955                   return TokenNameGREATER_EQUAL;
 956                 if (test > 0) {
 957                   if ((test = getNextChar('=', '>')) == 0)
 958                     return TokenNameRIGHT_SHIFT_EQUAL;
 959                   return TokenNameRIGHT_SHIFT;
 960                 }
 961                 return TokenNameGREATER;
 962               }
 963             case '=' :
 964               if (getNextChar('=')) {
 965                 if (getNextChar('=')) {
 966                   return TokenNameEQUAL_EQUAL_EQUAL;
 967                 }
 968                 return TokenNameEQUAL_EQUAL;
 969               }
 970               if (getNextChar('>'))
 971                 return TokenNameEQUAL_GREATER;
 972               return TokenNameEQUAL;
 973             case '&' :
 974               {
 975                 int test;
 976                 if ((test = getNextChar('&', '=')) == 0)
 977                   return TokenNameAND_AND;
 978                 if (test > 0)
 979                   return TokenNameAND_EQUAL;
 980                 return TokenNameAND;
 981               }
 982             case '|' :
 983               {
 984                 int test;
 985                 if ((test = getNextChar('|', '=')) == 0)
 986                   return TokenNameOR_OR;
 987                 if (test > 0)
 988                   return TokenNameOR_EQUAL;
 989                 return TokenNameOR;
 990               }
 991             case '^' :
 992               if (getNextChar('='))
 993                 return TokenNameXOR_EQUAL;
 994               return TokenNameXOR;
 995             case '?' :
 996               if (getNextChar('>')) {
 997                 phpMode = false;
 998                 return TokenNameStopPHP;
 999               }
1000               return TokenNameQUESTION;
1001             case ':' :
1002               if (getNextChar(':'))
1003                 return TokenNameCOLON_COLON;
1004               return TokenNameCOLON;
1005             case '@' :
1006               return TokenNameAT;
1007               //                                        case '\'' :
1008               //                                                {
1009               //                                                        int test;
1010               //                                                        if ((test = getNextChar('\n', '\r')) == 0) {
1011               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1012               //                                                        }
1013               //                                                        if (test > 0) {
1014               //                                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1015               //                                                                for (int lookAhead = 0;
1016               //                                                                        lookAhead < 3;
1017               //                                                                        lookAhead++) {
1018               //                                                                        if (currentPosition + lookAhead
1019               //                                                                                == source.length)
1020               //                                                                                break;
1021               //                                                                        if (source[currentPosition + lookAhead]
1022               //                                                                                == '\n')
1023               //                                                                                break;
1024               //                                                                        if (source[currentPosition + lookAhead]
1025               //                                                                                == '\'') {
1026               //                                                                                currentPosition += lookAhead + 1;
1027               //                                                                                break;
1028               //                                                                        }
1029               //                                                                }
1030               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1031               //                                                        }
1032               //                                                }
1033               //                                                if (getNextChar('\'')) {
1034               //                                                        // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1035               //                                                        for (int lookAhead = 0;
1036               //                                                                lookAhead < 3;
1037               //                                                                lookAhead++) {
1038               //                                                                if (currentPosition + lookAhead
1039               //                                                                        == source.length)
1040               //                                                                        break;
1041               //                                                                if (source[currentPosition + lookAhead]
1042               //                                                                        == '\n')
1043               //                                                                        break;
1044               //                                                                if (source[currentPosition + lookAhead]
1045               //                                                                        == '\'') {
1046               //                                                                        currentPosition += lookAhead + 1;
1047               //                                                                        break;
1048               //                                                                }
1049               //                                                        }
1050               //                                                        throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1051               //                                                }
1052               //                                                if (getNextChar('\\'))
1053               //                                                        scanEscapeCharacter();
1054               //                                                else { // consume next character
1055               //                                                        unicodeAsBackSlash = false;
1056               //                                                        if (((currentCharacter = source[currentPosition++])
1057               //                                                                == '\\')
1058               //                                                                && (source[currentPosition] == 'u')) {
1059               //                                                                getNextUnicodeChar();
1060               //                                                        } else {
1061               //                                                                if (withoutUnicodePtr != 0) {
1062               //                                                                        withoutUnicodeBuffer[++withoutUnicodePtr] =
1063               //                                                                                currentCharacter;
1064               //                                                                }
1065               //                                                        }
1066               //                                                }
1067               //                                                //            if (getNextChar('\''))
1068               //                                                //              return TokenNameCharacterLiteral;
1069               //                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1070               //                                                for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1071               //                                                        if (currentPosition + lookAhead == source.length)
1072               //                                                                break;
1073               //                                                        if (source[currentPosition + lookAhead] == '\n')
1074               //                                                                break;
1075               //                                                        if (source[currentPosition + lookAhead] == '\'') {
1076               //                                                                currentPosition += lookAhead + 1;
1077               //                                                                break;
1078               //                                                        }
1079               //                                                }
1080               //                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1081             case '\'' :
1082               try {
1083                 // consume next character
1084                 unicodeAsBackSlash = false;
1085                 currentCharacter = source[currentPosition++];
1086                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1087                 //                  && (source[currentPosition] == 'u')) {
1088                 //                  getNextUnicodeChar();
1089                 //                } else {
1090                 //                  if (withoutUnicodePtr != 0) {
1091                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1092                 //                      currentCharacter;
1093                 //                  }
1094                 //                }
1095
1096                 while (currentCharacter != '\'') {
1097
1098                   /**** in PHP \r and \n are valid in string literals ****/
1099                   //                  if ((currentCharacter == '\n')
1100                   //                    || (currentCharacter == '\r')) {
1101                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1102                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1103                   //                      if (currentPosition + lookAhead == source.length)
1104                   //                        break;
1105                   //                      if (source[currentPosition + lookAhead] == '\n')
1106                   //                        break;
1107                   //                      if (source[currentPosition + lookAhead] == '\"') {
1108                   //                        currentPosition += lookAhead + 1;
1109                   //                        break;
1110                   //                      }
1111                   //                    }
1112                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1113                   //                  }
1114                   if (currentCharacter == '\\') {
1115                     int escapeSize = currentPosition;
1116                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1117                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1118                     scanSingleQuotedEscapeCharacter();
1119                     escapeSize = currentPosition - escapeSize;
1120                     if (withoutUnicodePtr == 0) {
1121                       //buffer all the entries that have been left aside....
1122                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1123                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1124                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1125                     } else { //overwrite the / in the buffer
1126                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1127                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1128                         withoutUnicodePtr--;
1129                       }
1130                     }
1131                   }
1132                   // consume next character
1133                   unicodeAsBackSlash = false;
1134                   currentCharacter = source[currentPosition++];
1135                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1136                   //                    && (source[currentPosition] == 'u')) {
1137                   //                    getNextUnicodeChar();
1138                   //                  } else {
1139                   if (withoutUnicodePtr != 0) {
1140                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1141                   }
1142                   //                  }
1143
1144                 }
1145               } catch (IndexOutOfBoundsException e) {
1146                 throw new InvalidInputException(UNTERMINATED_STRING);
1147               } catch (InvalidInputException e) {
1148                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1149                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1150                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1151                     if (currentPosition + lookAhead == source.length)
1152                       break;
1153                     if (source[currentPosition + lookAhead] == '\n')
1154                       break;
1155                     if (source[currentPosition + lookAhead] == '\'') {
1156                       currentPosition += lookAhead + 1;
1157                       break;
1158                     }
1159                   }
1160
1161                 }
1162                 throw e; // rethrow
1163               }
1164               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1165                 if (currentLine == null) {
1166                   currentLine = new NLSLine();
1167                   lines.add(currentLine);
1168                 }
1169                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1170               }
1171               return TokenNameStringConstant;
1172             case '"' :
1173               try {
1174                 // consume next character
1175                 unicodeAsBackSlash = false;
1176                 currentCharacter = source[currentPosition++];
1177                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1178                 //                  && (source[currentPosition] == 'u')) {
1179                 //                  getNextUnicodeChar();
1180                 //                } else {
1181                 //                  if (withoutUnicodePtr != 0) {
1182                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1183                 //                      currentCharacter;
1184                 //                  }
1185                 //                }
1186
1187                 while (currentCharacter != '"') {
1188
1189                   /**** in PHP \r and \n are valid in string literals ****/
1190                   //                  if ((currentCharacter == '\n')
1191                   //                    || (currentCharacter == '\r')) {
1192                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1193                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1194                   //                      if (currentPosition + lookAhead == source.length)
1195                   //                        break;
1196                   //                      if (source[currentPosition + lookAhead] == '\n')
1197                   //                        break;
1198                   //                      if (source[currentPosition + lookAhead] == '\"') {
1199                   //                        currentPosition += lookAhead + 1;
1200                   //                        break;
1201                   //                      }
1202                   //                    }
1203                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1204                   //                  }
1205                   if (currentCharacter == '\\') {
1206                     int escapeSize = currentPosition;
1207                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1208                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1209                     scanDoubleQuotedEscapeCharacter();
1210                     escapeSize = currentPosition - escapeSize;
1211                     if (withoutUnicodePtr == 0) {
1212                       //buffer all the entries that have been left aside....
1213                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1214                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1215                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1216                     } else { //overwrite the / in the buffer
1217                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1218                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1219                         withoutUnicodePtr--;
1220                       }
1221                     }
1222                   }
1223                   // consume next character
1224                   unicodeAsBackSlash = false;
1225                   currentCharacter = source[currentPosition++];
1226                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1227                   //                    && (source[currentPosition] == 'u')) {
1228                   //                    getNextUnicodeChar();
1229                   //                  } else {
1230                   if (withoutUnicodePtr != 0) {
1231                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1232                   }
1233                   //                  }
1234
1235                 }
1236               } catch (IndexOutOfBoundsException e) {
1237                 throw new InvalidInputException(UNTERMINATED_STRING);
1238               } catch (InvalidInputException e) {
1239                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1240                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1241                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1242                     if (currentPosition + lookAhead == source.length)
1243                       break;
1244                     if (source[currentPosition + lookAhead] == '\n')
1245                       break;
1246                     if (source[currentPosition + lookAhead] == '\"') {
1247                       currentPosition += lookAhead + 1;
1248                       break;
1249                     }
1250                   }
1251
1252                 }
1253                 throw e; // rethrow
1254               }
1255               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1256                 if (currentLine == null) {
1257                   currentLine = new NLSLine();
1258                   lines.add(currentLine);
1259                 }
1260                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1261               }
1262               return TokenNameStringLiteral;
1263             case '`' :
1264               try {
1265                 // consume next character
1266                 unicodeAsBackSlash = false;
1267                 currentCharacter = source[currentPosition++];
1268                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1269                 //                  && (source[currentPosition] == 'u')) {
1270                 //                  getNextUnicodeChar();
1271                 //                } else {
1272                 //                  if (withoutUnicodePtr != 0) {
1273                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1274                 //                      currentCharacter;
1275                 //                  }
1276                 //                }
1277
1278                 while (currentCharacter != '`') {
1279
1280                   /**** in PHP \r and \n are valid in string literals ****/
1281                   //                if ((currentCharacter == '\n')
1282                   //                  || (currentCharacter == '\r')) {
1283                   //                  // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1284                   //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1285                   //                    if (currentPosition + lookAhead == source.length)
1286                   //                      break;
1287                   //                    if (source[currentPosition + lookAhead] == '\n')
1288                   //                      break;
1289                   //                    if (source[currentPosition + lookAhead] == '\"') {
1290                   //                      currentPosition += lookAhead + 1;
1291                   //                      break;
1292                   //                    }
1293                   //                  }
1294                   //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1295                   //                }
1296                   if (currentCharacter == '\\') {
1297                     int escapeSize = currentPosition;
1298                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1299                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1300                     scanDoubleQuotedEscapeCharacter();
1301                     escapeSize = currentPosition - escapeSize;
1302                     if (withoutUnicodePtr == 0) {
1303                       //buffer all the entries that have been left aside....
1304                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1305                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1306                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1307                     } else { //overwrite the / in the buffer
1308                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1309                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1310                         withoutUnicodePtr--;
1311                       }
1312                     }
1313                   }
1314                   // consume next character
1315                   unicodeAsBackSlash = false;
1316                   currentCharacter = source[currentPosition++];
1317                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1318                   //                    && (source[currentPosition] == 'u')) {
1319                   //                    getNextUnicodeChar();
1320                   //                  } else {
1321                   if (withoutUnicodePtr != 0) {
1322                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1323                   }
1324                   //                  }
1325
1326                 }
1327               } catch (IndexOutOfBoundsException e) {
1328                 throw new InvalidInputException(UNTERMINATED_STRING);
1329               } catch (InvalidInputException e) {
1330                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1331                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1332                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1333                     if (currentPosition + lookAhead == source.length)
1334                       break;
1335                     if (source[currentPosition + lookAhead] == '\n')
1336                       break;
1337                     if (source[currentPosition + lookAhead] == '`') {
1338                       currentPosition += lookAhead + 1;
1339                       break;
1340                     }
1341                   }
1342
1343                 }
1344                 throw e; // rethrow
1345               }
1346               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1347                 if (currentLine == null) {
1348                   currentLine = new NLSLine();
1349                   lines.add(currentLine);
1350                 }
1351                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1352               }
1353               return TokenNameStringInterpolated;
1354             case '#' :
1355             case '/' :
1356               {
1357                 int test;
1358                 if ((currentCharacter == '#') || (test = getNextChar('/', '*')) == 0) {
1359                   //line comment
1360                   int endPositionForLineComment = 0;
1361                   try { //get the next char
1362                     currentCharacter = source[currentPosition++];
1363                     //                    if (((currentCharacter = source[currentPosition++])
1364                     //                      == '\\')
1365                     //                      && (source[currentPosition] == 'u')) {
1366                     //                      //-------------unicode traitement ------------
1367                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1368                     //                      currentPosition++;
1369                     //                      while (source[currentPosition] == 'u') {
1370                     //                        currentPosition++;
1371                     //                      }
1372                     //                      if ((c1 =
1373                     //                        Character.getNumericValue(source[currentPosition++]))
1374                     //                        > 15
1375                     //                        || c1 < 0
1376                     //                        || (c2 =
1377                     //                          Character.getNumericValue(source[currentPosition++]))
1378                     //                          > 15
1379                     //                        || c2 < 0
1380                     //                        || (c3 =
1381                     //                          Character.getNumericValue(source[currentPosition++]))
1382                     //                          > 15
1383                     //                        || c3 < 0
1384                     //                        || (c4 =
1385                     //                          Character.getNumericValue(source[currentPosition++]))
1386                     //                          > 15
1387                     //                        || c4 < 0) {
1388                     //                        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1389                     //                      } else {
1390                     //                        currentCharacter =
1391                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1392                     //                      }
1393                     //                    }
1394
1395                     //handle the \\u case manually into comment
1396                     //                    if (currentCharacter == '\\') {
1397                     //                      if (source[currentPosition] == '\\')
1398                     //                        currentPosition++;
1399                     //                    } //jump over the \\
1400                     boolean isUnicode = false;
1401                     while (currentCharacter != '\r' && currentCharacter != '\n') {
1402                       if (currentCharacter == '?') {
1403                         if (getNextChar('>')) {
1404                           startPosition = currentPosition - 2;
1405                           phpMode = false;
1406                           return TokenNameStopPHP;
1407                         }
1408                       }
1409
1410                       //get the next char
1411                       isUnicode = false;
1412                       currentCharacter = source[currentPosition++];
1413                       //                      if (((currentCharacter = source[currentPosition++])
1414                       //                        == '\\')
1415                       //                        && (source[currentPosition] == 'u')) {
1416                       //                        isUnicode = true;
1417                       //                        //-------------unicode traitement ------------
1418                       //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1419                       //                        currentPosition++;
1420                       //                        while (source[currentPosition] == 'u') {
1421                       //                          currentPosition++;
1422                       //                        }
1423                       //                        if ((c1 =
1424                       //                          Character.getNumericValue(source[currentPosition++]))
1425                       //                          > 15
1426                       //                          || c1 < 0
1427                       //                          || (c2 =
1428                       //                            Character.getNumericValue(
1429                       //                              source[currentPosition++]))
1430                       //                            > 15
1431                       //                          || c2 < 0
1432                       //                          || (c3 =
1433                       //                            Character.getNumericValue(
1434                       //                              source[currentPosition++]))
1435                       //                            > 15
1436                       //                          || c3 < 0
1437                       //                          || (c4 =
1438                       //                            Character.getNumericValue(
1439                       //                              source[currentPosition++]))
1440                       //                            > 15
1441                       //                          || c4 < 0) {
1442                       //                          throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1443                       //                        } else {
1444                       //                          currentCharacter =
1445                       //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1446                       //                        }
1447                       //                      }
1448                       //handle the \\u case manually into comment
1449                       //                      if (currentCharacter == '\\') {
1450                       //                        if (source[currentPosition] == '\\')
1451                       //                          currentPosition++;
1452                       //                      } //jump over the \\
1453                     }
1454                     if (isUnicode) {
1455                       endPositionForLineComment = currentPosition - 6;
1456                     } else {
1457                       endPositionForLineComment = currentPosition - 1;
1458                     }
1459                     recordComment(false);
1460                     if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1461                       checkNonExternalizeString();
1462                       if (recordLineSeparator) {
1463                         if (isUnicode) {
1464                           pushUnicodeLineSeparator();
1465                         } else {
1466                           pushLineSeparator();
1467                         }
1468                       } else {
1469                         currentLine = null;
1470                       }
1471                     }
1472                     if (tokenizeComments) {
1473                       if (!isUnicode) {
1474                         currentPosition = endPositionForLineComment;
1475                         // reset one character behind
1476                       }
1477                       return TokenNameCOMMENT_LINE;
1478                     }
1479                   } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1480                     if (tokenizeComments) {
1481                       currentPosition--;
1482                       // reset one character behind
1483                       return TokenNameCOMMENT_LINE;
1484                     }
1485                   }
1486                   break;
1487                 }
1488                 if (test > 0) {
1489                   //traditional and annotation comment
1490                   boolean isJavadoc = false, star = false;
1491                   // consume next character
1492                   unicodeAsBackSlash = false;
1493                   currentCharacter = source[currentPosition++];
1494                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1495                   //                    && (source[currentPosition] == 'u')) {
1496                   //                    getNextUnicodeChar();
1497                   //                  } else {
1498                   //                    if (withoutUnicodePtr != 0) {
1499                   //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1500                   //                        currentCharacter;
1501                   //                    }
1502                   //                  }
1503
1504                   if (currentCharacter == '*') {
1505                     isJavadoc = true;
1506                     star = true;
1507                   }
1508                   if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1509                     checkNonExternalizeString();
1510                     if (recordLineSeparator) {
1511                       pushLineSeparator();
1512                     } else {
1513                       currentLine = null;
1514                     }
1515                   }
1516                   try { //get the next char
1517                     currentCharacter = source[currentPosition++];
1518                     //                    if (((currentCharacter = source[currentPosition++])
1519                     //                      == '\\')
1520                     //                      && (source[currentPosition] == 'u')) {
1521                     //                      //-------------unicode traitement ------------
1522                     //                      getNextUnicodeChar();
1523                     //                    }
1524                     //handle the \\u case manually into comment
1525                     //                    if (currentCharacter == '\\') {
1526                     //                      if (source[currentPosition] == '\\')
1527                     //                        currentPosition++;
1528                     //                      //jump over the \\
1529                     //                    }
1530                     // empty comment is not a javadoc /**/
1531                     if (currentCharacter == '/') {
1532                       isJavadoc = false;
1533                     }
1534                     //loop until end of comment */
1535                     while ((currentCharacter != '/') || (!star)) {
1536                       if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1537                         checkNonExternalizeString();
1538                         if (recordLineSeparator) {
1539                           pushLineSeparator();
1540                         } else {
1541                           currentLine = null;
1542                         }
1543                       }
1544                       star = currentCharacter == '*';
1545                       //get next char
1546                       currentCharacter = source[currentPosition++];
1547                       //                      if (((currentCharacter = source[currentPosition++])
1548                       //                        == '\\')
1549                       //                        && (source[currentPosition] == 'u')) {
1550                       //                        //-------------unicode traitement ------------
1551                       //                        getNextUnicodeChar();
1552                       //                      }
1553                       //handle the \\u case manually into comment
1554                       //                      if (currentCharacter == '\\') {
1555                       //                        if (source[currentPosition] == '\\')
1556                       //                          currentPosition++;
1557                       //                      } //jump over the \\
1558                     }
1559                     recordComment(isJavadoc);
1560                     if (tokenizeComments) {
1561                       if (isJavadoc)
1562                         return TokenNameCOMMENT_PHPDOC;
1563                       return TokenNameCOMMENT_BLOCK;
1564                     }
1565                   } catch (IndexOutOfBoundsException e) {
1566                     throw new InvalidInputException(UNTERMINATED_COMMENT);
1567                   }
1568                   break;
1569                 }
1570                 if (getNextChar('='))
1571                   return TokenNameDIVIDE_EQUAL;
1572                 return TokenNameDIVIDE;
1573               }
1574             case '\u001a' :
1575               if (atEnd())
1576                 return TokenNameEOF;
1577               //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1578               throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1579
1580             default :
1581               if (currentCharacter == '$') {
1582                 while ((currentCharacter = source[currentPosition++]) == '$') {
1583                 }
1584                 if (currentCharacter == '{')
1585                   return TokenNameDOLLAR_LBRACE;
1586                 if (isPHPIdentifierStart(currentCharacter))
1587                   return scanIdentifierOrKeyword(true);
1588                 return TokenNameERROR;
1589               }
1590               if (isPHPIdentifierStart(currentCharacter))
1591                 return scanIdentifierOrKeyword(false);
1592               if (Character.isDigit(currentCharacter))
1593                 return scanNumber(false);
1594               return TokenNameERROR;
1595           }
1596         }
1597       } //-----------------end switch while try--------------------
1598       catch (IndexOutOfBoundsException e) {
1599       }
1600     }
1601     return TokenNameEOF;
1602   }
1603
1604   //  public final void getNextUnicodeChar()
1605   //    throws IndexOutOfBoundsException, InvalidInputException {
1606   //    //VOID
1607   //    //handle the case of unicode.
1608   //    //when a unicode appears then we must use a buffer that holds char internal values
1609   //    //At the end of this method currentCharacter holds the new visited char
1610   //    //and currentPosition points right next after it
1611   //
1612   //    //ALL getNextChar.... ARE OPTIMIZED COPIES
1613   //
1614   //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1615   //    currentPosition++;
1616   //    while (source[currentPosition] == 'u') {
1617   //      currentPosition++;
1618   //      unicodeSize++;
1619   //    }
1620   //
1621   //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1622   //      || c1 < 0
1623   //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1624   //      || c2 < 0
1625   //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1626   //      || c3 < 0
1627   //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1628   //      || c4 < 0) {
1629   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1630   //    } else {
1631   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1632   //      //need the unicode buffer
1633   //      if (withoutUnicodePtr == 0) {
1634   //        //buffer all the entries that have been left aside....
1635   //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1636   //        System.arraycopy(
1637   //          source,
1638   //          startPosition,
1639   //          withoutUnicodeBuffer,
1640   //          1,
1641   //          withoutUnicodePtr);
1642   //      }
1643   //      //fill the buffer with the char
1644   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1645   //    }
1646   //    unicodeAsBackSlash = currentCharacter == '\\';
1647   //  }
1648   /* Tokenize a method body, assuming that curly brackets are properly balanced.
1649    */
1650   public final void jumpOverMethodBody() {
1651
1652     this.wasAcr = false;
1653     int found = 1;
1654     try {
1655       while (true) { //loop for jumping over comments
1656         // ---------Consume white space and handles startPosition---------
1657         boolean isWhiteSpace;
1658         do {
1659           startPosition = currentPosition;
1660           currentCharacter = source[currentPosition++];
1661           //          if (((currentCharacter = source[currentPosition++]) == '\\')
1662           //            && (source[currentPosition] == 'u')) {
1663           //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
1664           //          } else {
1665           if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1666             pushLineSeparator();
1667           isWhiteSpace = Character.isWhitespace(currentCharacter);
1668           //          }
1669         } while (isWhiteSpace);
1670
1671         // -------consume token until } is found---------
1672         switch (currentCharacter) {
1673           case '{' :
1674             found++;
1675             break;
1676           case '}' :
1677             found--;
1678             if (found == 0)
1679               return;
1680             break;
1681           case '\'' :
1682             {
1683               boolean test;
1684               test = getNextChar('\\');
1685               if (test) {
1686                 try {
1687                   scanDoubleQuotedEscapeCharacter();
1688                 } catch (InvalidInputException ex) {
1689                 };
1690               } else {
1691                 //                try { // consume next character
1692                 unicodeAsBackSlash = false;
1693                 currentCharacter = source[currentPosition++];
1694                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1695                 //                    && (source[currentPosition] == 'u')) {
1696                 //                    getNextUnicodeChar();
1697                 //                  } else {
1698                 if (withoutUnicodePtr != 0) {
1699                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1700                 }
1701                 //                  }
1702                 //                } catch (InvalidInputException ex) {
1703                 //                };
1704               }
1705               getNextChar('\'');
1706               break;
1707             }
1708           case '"' :
1709             try {
1710               //              try { // consume next character
1711               unicodeAsBackSlash = false;
1712               currentCharacter = source[currentPosition++];
1713               //                if (((currentCharacter = source[currentPosition++]) == '\\')
1714               //                  && (source[currentPosition] == 'u')) {
1715               //                  getNextUnicodeChar();
1716               //                } else {
1717               if (withoutUnicodePtr != 0) {
1718                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1719               }
1720               //                }
1721               //              } catch (InvalidInputException ex) {
1722               //              };
1723               while (currentCharacter != '"') {
1724                 if (currentCharacter == '\r') {
1725                   if (source[currentPosition] == '\n')
1726                     currentPosition++;
1727                   break;
1728                   // the string cannot go further that the line
1729                 }
1730                 if (currentCharacter == '\n') {
1731                   break;
1732                   // the string cannot go further that the line
1733                 }
1734                 if (currentCharacter == '\\') {
1735                   try {
1736                     scanDoubleQuotedEscapeCharacter();
1737                   } catch (InvalidInputException ex) {
1738                   };
1739                 }
1740                 //                try { // consume next character
1741                 unicodeAsBackSlash = false;
1742                 currentCharacter = source[currentPosition++];
1743                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1744                 //                    && (source[currentPosition] == 'u')) {
1745                 //                    getNextUnicodeChar();
1746                 //                  } else {
1747                 if (withoutUnicodePtr != 0) {
1748                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1749                 }
1750                 //                  }
1751                 //                } catch (InvalidInputException ex) {
1752                 //                };
1753               }
1754             } catch (IndexOutOfBoundsException e) {
1755               return;
1756             }
1757             break;
1758           case '/' :
1759             {
1760               int test;
1761               if ((test = getNextChar('/', '*')) == 0) {
1762                 //line comment
1763                 try {
1764                   //get the next char
1765                   currentCharacter = source[currentPosition++];
1766                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1767                   //                    && (source[currentPosition] == 'u')) {
1768                   //                    //-------------unicode traitement ------------
1769                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1770                   //                    currentPosition++;
1771                   //                    while (source[currentPosition] == 'u') {
1772                   //                      currentPosition++;
1773                   //                    }
1774                   //                    if ((c1 =
1775                   //                      Character.getNumericValue(source[currentPosition++]))
1776                   //                      > 15
1777                   //                      || c1 < 0
1778                   //                      || (c2 =
1779                   //                        Character.getNumericValue(source[currentPosition++]))
1780                   //                        > 15
1781                   //                      || c2 < 0
1782                   //                      || (c3 =
1783                   //                        Character.getNumericValue(source[currentPosition++]))
1784                   //                        > 15
1785                   //                      || c3 < 0
1786                   //                      || (c4 =
1787                   //                        Character.getNumericValue(source[currentPosition++]))
1788                   //                        > 15
1789                   //                      || c4 < 0) {
1790                   //                      //error don't care of the value
1791                   //                      currentCharacter = 'A';
1792                   //                    } //something different from \n and \r
1793                   //                    else {
1794                   //                      currentCharacter =
1795                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1796                   //                    }
1797                   //                  }
1798
1799                   while (currentCharacter != '\r' && currentCharacter != '\n') {
1800                     //get the next char
1801                     currentCharacter = source[currentPosition++];
1802                     //                    if (((currentCharacter = source[currentPosition++])
1803                     //                      == '\\')
1804                     //                      && (source[currentPosition] == 'u')) {
1805                     //                      //-------------unicode traitement ------------
1806                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1807                     //                      currentPosition++;
1808                     //                      while (source[currentPosition] == 'u') {
1809                     //                        currentPosition++;
1810                     //                      }
1811                     //                      if ((c1 =
1812                     //                        Character.getNumericValue(source[currentPosition++]))
1813                     //                        > 15
1814                     //                        || c1 < 0
1815                     //                        || (c2 =
1816                     //                          Character.getNumericValue(source[currentPosition++]))
1817                     //                          > 15
1818                     //                        || c2 < 0
1819                     //                        || (c3 =
1820                     //                          Character.getNumericValue(source[currentPosition++]))
1821                     //                          > 15
1822                     //                        || c3 < 0
1823                     //                        || (c4 =
1824                     //                          Character.getNumericValue(source[currentPosition++]))
1825                     //                          > 15
1826                     //                        || c4 < 0) {
1827                     //                        //error don't care of the value
1828                     //                        currentCharacter = 'A';
1829                     //                      } //something different from \n and \r
1830                     //                      else {
1831                     //                        currentCharacter =
1832                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1833                     //                      }
1834                     //                    }
1835                   }
1836                   if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1837                     pushLineSeparator();
1838                 } catch (IndexOutOfBoundsException e) {
1839                 } //an eof will them be generated
1840                 break;
1841               }
1842               if (test > 0) {
1843                 //traditional and annotation comment
1844                 boolean star = false;
1845                 //                try { // consume next character
1846                 unicodeAsBackSlash = false;
1847                 currentCharacter = source[currentPosition++];
1848                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1849                 //                    && (source[currentPosition] == 'u')) {
1850                 //                    getNextUnicodeChar();
1851                 //                  } else {
1852                 if (withoutUnicodePtr != 0) {
1853                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1854                 }
1855                 //                  };
1856                 //                } catch (InvalidInputException ex) {
1857                 //                };
1858                 if (currentCharacter == '*') {
1859                   star = true;
1860                 }
1861                 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1862                   pushLineSeparator();
1863                 try { //get the next char
1864                   currentCharacter = source[currentPosition++];
1865                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1866                   //                    && (source[currentPosition] == 'u')) {
1867                   //                    //-------------unicode traitement ------------
1868                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1869                   //                    currentPosition++;
1870                   //                    while (source[currentPosition] == 'u') {
1871                   //                      currentPosition++;
1872                   //                    }
1873                   //                    if ((c1 =
1874                   //                      Character.getNumericValue(source[currentPosition++]))
1875                   //                      > 15
1876                   //                      || c1 < 0
1877                   //                      || (c2 =
1878                   //                        Character.getNumericValue(source[currentPosition++]))
1879                   //                        > 15
1880                   //                      || c2 < 0
1881                   //                      || (c3 =
1882                   //                        Character.getNumericValue(source[currentPosition++]))
1883                   //                        > 15
1884                   //                      || c3 < 0
1885                   //                      || (c4 =
1886                   //                        Character.getNumericValue(source[currentPosition++]))
1887                   //                        > 15
1888                   //                      || c4 < 0) {
1889                   //                      //error don't care of the value
1890                   //                      currentCharacter = 'A';
1891                   //                    } //something different from * and /
1892                   //                    else {
1893                   //                      currentCharacter =
1894                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1895                   //                    }
1896                   //                  }
1897                   //loop until end of comment */
1898                   while ((currentCharacter != '/') || (!star)) {
1899                     if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1900                       pushLineSeparator();
1901                     star = currentCharacter == '*';
1902                     //get next char
1903                     currentCharacter = source[currentPosition++];
1904                     //                    if (((currentCharacter = source[currentPosition++])
1905                     //                      == '\\')
1906                     //                      && (source[currentPosition] == 'u')) {
1907                     //                      //-------------unicode traitement ------------
1908                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1909                     //                      currentPosition++;
1910                     //                      while (source[currentPosition] == 'u') {
1911                     //                        currentPosition++;
1912                     //                      }
1913                     //                      if ((c1 =
1914                     //                        Character.getNumericValue(source[currentPosition++]))
1915                     //                        > 15
1916                     //                        || c1 < 0
1917                     //                        || (c2 =
1918                     //                          Character.getNumericValue(source[currentPosition++]))
1919                     //                          > 15
1920                     //                        || c2 < 0
1921                     //                        || (c3 =
1922                     //                          Character.getNumericValue(source[currentPosition++]))
1923                     //                          > 15
1924                     //                        || c3 < 0
1925                     //                        || (c4 =
1926                     //                          Character.getNumericValue(source[currentPosition++]))
1927                     //                          > 15
1928                     //                        || c4 < 0) {
1929                     //                        //error don't care of the value
1930                     //                        currentCharacter = 'A';
1931                     //                      } //something different from * and /
1932                     //                      else {
1933                     //                        currentCharacter =
1934                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1935                     //                      }
1936                     //                    }
1937                   }
1938                 } catch (IndexOutOfBoundsException e) {
1939                   return;
1940                 }
1941                 break;
1942               }
1943               break;
1944             }
1945
1946           default :
1947             if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
1948               try {
1949                 scanIdentifierOrKeyword((currentCharacter == '$'));
1950               } catch (InvalidInputException ex) {
1951               };
1952               break;
1953             }
1954             if (Character.isDigit(currentCharacter)) {
1955               try {
1956                 scanNumber(false);
1957               } catch (InvalidInputException ex) {
1958               };
1959               break;
1960             }
1961         }
1962       }
1963       //-----------------end switch while try--------------------
1964     } catch (IndexOutOfBoundsException e) {
1965     } catch (InvalidInputException e) {
1966     }
1967     return;
1968   }
1969   //  public final boolean jumpOverUnicodeWhiteSpace()
1970   //    throws InvalidInputException {
1971   //    //BOOLEAN
1972   //    //handle the case of unicode. Jump over the next whiteSpace
1973   //    //making startPosition pointing on the next available char
1974   //    //On false, the currentCharacter is filled up with a potential
1975   //    //correct char
1976   //
1977   //    try {
1978   //      this.wasAcr = false;
1979   //      int c1, c2, c3, c4;
1980   //      int unicodeSize = 6;
1981   //      currentPosition++;
1982   //      while (source[currentPosition] == 'u') {
1983   //        currentPosition++;
1984   //        unicodeSize++;
1985   //      }
1986   //
1987   //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1988   //        || c1 < 0)
1989   //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
1990   //          || c2 < 0)
1991   //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
1992   //          || c3 < 0)
1993   //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
1994   //          || c4 < 0)) {
1995   //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1996   //      }
1997   //
1998   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1999   //      if (recordLineSeparator
2000   //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2001   //        pushLineSeparator();
2002   //      if (Character.isWhitespace(currentCharacter))
2003   //        return true;
2004   //
2005   //      //buffer the new char which is not a white space
2006   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2007   //      //withoutUnicodePtr == 1 is true here
2008   //      return false;
2009   //    } catch (IndexOutOfBoundsException e) {
2010   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2011   //    }
2012   //  }
2013   public final int[] getLineEnds() {
2014     //return a bounded copy of this.lineEnds
2015
2016     int[] copy;
2017     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2018     return copy;
2019   }
2020
2021   public char[] getSource() {
2022     return this.source;
2023   }
2024   final char[] optimizedCurrentTokenSource1() {
2025     //return always the same char[] build only once
2026
2027     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2028     char charOne = source[startPosition];
2029     switch (charOne) {
2030       case 'a' :
2031         return charArray_a;
2032       case 'b' :
2033         return charArray_b;
2034       case 'c' :
2035         return charArray_c;
2036       case 'd' :
2037         return charArray_d;
2038       case 'e' :
2039         return charArray_e;
2040       case 'f' :
2041         return charArray_f;
2042       case 'g' :
2043         return charArray_g;
2044       case 'h' :
2045         return charArray_h;
2046       case 'i' :
2047         return charArray_i;
2048       case 'j' :
2049         return charArray_j;
2050       case 'k' :
2051         return charArray_k;
2052       case 'l' :
2053         return charArray_l;
2054       case 'm' :
2055         return charArray_m;
2056       case 'n' :
2057         return charArray_n;
2058       case 'o' :
2059         return charArray_o;
2060       case 'p' :
2061         return charArray_p;
2062       case 'q' :
2063         return charArray_q;
2064       case 'r' :
2065         return charArray_r;
2066       case 's' :
2067         return charArray_s;
2068       case 't' :
2069         return charArray_t;
2070       case 'u' :
2071         return charArray_u;
2072       case 'v' :
2073         return charArray_v;
2074       case 'w' :
2075         return charArray_w;
2076       case 'x' :
2077         return charArray_x;
2078       case 'y' :
2079         return charArray_y;
2080       case 'z' :
2081         return charArray_z;
2082       default :
2083         return new char[] { charOne };
2084     }
2085   }
2086
2087   final char[] optimizedCurrentTokenSource2() {
2088     //try to return the same char[] build only once
2089
2090     char c0, c1;
2091     int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) % TableSize;
2092     char[][] table = charArray_length[0][hash];
2093     int i = newEntry2;
2094     while (++i < InternalTableSize) {
2095       char[] charArray = table[i];
2096       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2097         return charArray;
2098     }
2099     //---------other side---------
2100     i = -1;
2101     int max = newEntry2;
2102     while (++i <= max) {
2103       char[] charArray = table[i];
2104       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2105         return charArray;
2106     }
2107     //--------add the entry-------
2108     if (++max >= InternalTableSize)
2109       max = 0;
2110     char[] r;
2111     table[max] = (r = new char[] { c0, c1 });
2112     newEntry2 = max;
2113     return r;
2114   }
2115
2116   final char[] optimizedCurrentTokenSource3() {
2117     //try to return the same char[] build only once
2118
2119     char c0, c1, c2;
2120     int hash =
2121       (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2122         % TableSize;
2123     char[][] table = charArray_length[1][hash];
2124     int i = newEntry3;
2125     while (++i < InternalTableSize) {
2126       char[] charArray = table[i];
2127       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2128         return charArray;
2129     }
2130     //---------other side---------
2131     i = -1;
2132     int max = newEntry3;
2133     while (++i <= max) {
2134       char[] charArray = table[i];
2135       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2136         return charArray;
2137     }
2138     //--------add the entry-------
2139     if (++max >= InternalTableSize)
2140       max = 0;
2141     char[] r;
2142     table[max] = (r = new char[] { c0, c1, c2 });
2143     newEntry3 = max;
2144     return r;
2145   }
2146
2147   final char[] optimizedCurrentTokenSource4() {
2148     //try to return the same char[] build only once
2149
2150     char c0, c1, c2, c3;
2151     long hash =
2152       ((((long) (c0 = source[startPosition])) << 18)
2153         + ((c1 = source[startPosition + 1]) << 12)
2154         + ((c2 = source[startPosition + 2]) << 6)
2155         + (c3 = source[startPosition + 3]))
2156         % TableSize;
2157     char[][] table = charArray_length[2][(int) hash];
2158     int i = newEntry4;
2159     while (++i < InternalTableSize) {
2160       char[] charArray = table[i];
2161       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2162         return charArray;
2163     }
2164     //---------other side---------
2165     i = -1;
2166     int max = newEntry4;
2167     while (++i <= max) {
2168       char[] charArray = table[i];
2169       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2170         return charArray;
2171     }
2172     //--------add the entry-------
2173     if (++max >= InternalTableSize)
2174       max = 0;
2175     char[] r;
2176     table[max] = (r = new char[] { c0, c1, c2, c3 });
2177     newEntry4 = max;
2178     return r;
2179
2180   }
2181
2182   final char[] optimizedCurrentTokenSource5() {
2183     //try to return the same char[] build only once
2184
2185     char c0, c1, c2, c3, c4;
2186     long hash =
2187       ((((long) (c0 = source[startPosition])) << 24)
2188         + (((long) (c1 = source[startPosition + 1])) << 18)
2189         + ((c2 = source[startPosition + 2]) << 12)
2190         + ((c3 = source[startPosition + 3]) << 6)
2191         + (c4 = source[startPosition + 4]))
2192         % TableSize;
2193     char[][] table = charArray_length[3][(int) hash];
2194     int i = newEntry5;
2195     while (++i < InternalTableSize) {
2196       char[] charArray = table[i];
2197       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2198         return charArray;
2199     }
2200     //---------other side---------
2201     i = -1;
2202     int max = newEntry5;
2203     while (++i <= max) {
2204       char[] charArray = table[i];
2205       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2206         return charArray;
2207     }
2208     //--------add the entry-------
2209     if (++max >= InternalTableSize)
2210       max = 0;
2211     char[] r;
2212     table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2213     newEntry5 = max;
2214     return r;
2215
2216   }
2217
2218   final char[] optimizedCurrentTokenSource6() {
2219     //try to return the same char[] build only once
2220
2221     char c0, c1, c2, c3, c4, c5;
2222     long hash =
2223       ((((long) (c0 = source[startPosition])) << 32)
2224         + (((long) (c1 = source[startPosition + 1])) << 24)
2225         + (((long) (c2 = source[startPosition + 2])) << 18)
2226         + ((c3 = source[startPosition + 3]) << 12)
2227         + ((c4 = source[startPosition + 4]) << 6)
2228         + (c5 = source[startPosition + 5]))
2229         % TableSize;
2230     char[][] table = charArray_length[4][(int) hash];
2231     int i = newEntry6;
2232     while (++i < InternalTableSize) {
2233       char[] charArray = table[i];
2234       if ((c0 == charArray[0])
2235         && (c1 == charArray[1])
2236         && (c2 == charArray[2])
2237         && (c3 == charArray[3])
2238         && (c4 == charArray[4])
2239         && (c5 == charArray[5]))
2240         return charArray;
2241     }
2242     //---------other side---------
2243     i = -1;
2244     int max = newEntry6;
2245     while (++i <= max) {
2246       char[] charArray = table[i];
2247       if ((c0 == charArray[0])
2248         && (c1 == charArray[1])
2249         && (c2 == charArray[2])
2250         && (c3 == charArray[3])
2251         && (c4 == charArray[4])
2252         && (c5 == charArray[5]))
2253         return charArray;
2254     }
2255     //--------add the entry-------
2256     if (++max >= InternalTableSize)
2257       max = 0;
2258     char[] r;
2259     table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2260     newEntry6 = max;
2261     return r;
2262   }
2263
2264   public final void pushLineSeparator() throws InvalidInputException {
2265     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2266     final int INCREMENT = 250;
2267
2268     if (this.checkNonExternalizedStringLiterals) {
2269       // reinitialize the current line for non externalize strings purpose
2270       currentLine = null;
2271     }
2272     //currentCharacter is at position currentPosition-1
2273
2274     // cr 000D
2275     if (currentCharacter == '\r') {
2276       int separatorPos = currentPosition - 1;
2277       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2278         return;
2279       //System.out.println("CR-" + separatorPos);
2280       try {
2281         lineEnds[++linePtr] = separatorPos;
2282       } catch (IndexOutOfBoundsException e) {
2283         //linePtr value is correct
2284         int oldLength = lineEnds.length;
2285         int[] old = lineEnds;
2286         lineEnds = new int[oldLength + INCREMENT];
2287         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2288         lineEnds[linePtr] = separatorPos;
2289       }
2290       // look-ahead for merged cr+lf
2291       try {
2292         if (source[currentPosition] == '\n') {
2293           //System.out.println("look-ahead LF-" + currentPosition);
2294           lineEnds[linePtr] = currentPosition;
2295           currentPosition++;
2296           wasAcr = false;
2297         } else {
2298           wasAcr = true;
2299         }
2300       } catch (IndexOutOfBoundsException e) {
2301         wasAcr = true;
2302       }
2303     } else {
2304       // lf 000A
2305       if (currentCharacter == '\n') {
2306         //must merge eventual cr followed by lf
2307         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2308           //System.out.println("merge LF-" + (currentPosition - 1));
2309           lineEnds[linePtr] = currentPosition - 1;
2310         } else {
2311           int separatorPos = currentPosition - 1;
2312           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2313             return;
2314           // System.out.println("LF-" + separatorPos);
2315           try {
2316             lineEnds[++linePtr] = separatorPos;
2317           } catch (IndexOutOfBoundsException e) {
2318             //linePtr value is correct
2319             int oldLength = lineEnds.length;
2320             int[] old = lineEnds;
2321             lineEnds = new int[oldLength + INCREMENT];
2322             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2323             lineEnds[linePtr] = separatorPos;
2324           }
2325         }
2326         wasAcr = false;
2327       }
2328     }
2329   }
2330   public final void pushUnicodeLineSeparator() {
2331     // isUnicode means that the \r or \n has been read as a unicode character
2332
2333     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2334
2335     final int INCREMENT = 250;
2336     //currentCharacter is at position currentPosition-1
2337
2338     if (this.checkNonExternalizedStringLiterals) {
2339       // reinitialize the current line for non externalize strings purpose
2340       currentLine = null;
2341     }
2342
2343     // cr 000D
2344     if (currentCharacter == '\r') {
2345       int separatorPos = currentPosition - 6;
2346       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2347         return;
2348       //System.out.println("CR-" + separatorPos);
2349       try {
2350         lineEnds[++linePtr] = separatorPos;
2351       } catch (IndexOutOfBoundsException e) {
2352         //linePtr value is correct
2353         int oldLength = lineEnds.length;
2354         int[] old = lineEnds;
2355         lineEnds = new int[oldLength + INCREMENT];
2356         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2357         lineEnds[linePtr] = separatorPos;
2358       }
2359       // look-ahead for merged cr+lf
2360       if (source[currentPosition] == '\n') {
2361         //System.out.println("look-ahead LF-" + currentPosition);
2362         lineEnds[linePtr] = currentPosition;
2363         currentPosition++;
2364         wasAcr = false;
2365       } else {
2366         wasAcr = true;
2367       }
2368     } else {
2369       // lf 000A
2370       if (currentCharacter == '\n') {
2371         //must merge eventual cr followed by lf
2372         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2373           //System.out.println("merge LF-" + (currentPosition - 1));
2374           lineEnds[linePtr] = currentPosition - 6;
2375         } else {
2376           int separatorPos = currentPosition - 6;
2377           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2378             return;
2379           // System.out.println("LF-" + separatorPos);
2380           try {
2381             lineEnds[++linePtr] = separatorPos;
2382           } catch (IndexOutOfBoundsException e) {
2383             //linePtr value is correct
2384             int oldLength = lineEnds.length;
2385             int[] old = lineEnds;
2386             lineEnds = new int[oldLength + INCREMENT];
2387             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2388             lineEnds[linePtr] = separatorPos;
2389           }
2390         }
2391         wasAcr = false;
2392       }
2393     }
2394   }
2395   public final void recordComment(boolean isJavadoc) {
2396
2397     // a new annotation comment is recorded
2398     try {
2399       commentStops[++commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2400     } catch (IndexOutOfBoundsException e) {
2401       int oldStackLength = commentStops.length;
2402       int[] oldStack = commentStops;
2403       commentStops = new int[oldStackLength + 30];
2404       System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2405       commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2406       //grows the positions buffers too
2407       int[] old = commentStarts;
2408       commentStarts = new int[oldStackLength + 30];
2409       System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2410     }
2411
2412     //the buffer is of a correct size here
2413     commentStarts[commentPtr] = startPosition;
2414   }
2415   public void resetTo(int begin, int end) {
2416     //reset the scanner to a given position where it may rescan again
2417
2418     diet = false;
2419     initialPosition = startPosition = currentPosition = begin;
2420     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2421     commentPtr = -1; // reset comment stack
2422   }
2423
2424   public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2425     // the string with "\\u" is a legal string of two chars \ and u
2426     //thus we use a direct access to the source (for regular cases).
2427
2428     //    if (unicodeAsBackSlash) {
2429     //      // consume next character
2430     //      unicodeAsBackSlash = false;
2431     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2432     //        && (source[currentPosition] == 'u')) {
2433     //        getNextUnicodeChar();
2434     //      } else {
2435     //        if (withoutUnicodePtr != 0) {
2436     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2437     //        }
2438     //      }
2439     //    } else
2440     currentCharacter = source[currentPosition++];
2441     switch (currentCharacter) {
2442       case '\'' :
2443         currentCharacter = '\'';
2444         break;
2445       case '\\' :
2446         currentCharacter = '\\';
2447         break;
2448       default :
2449         currentCharacter = '\\';
2450         currentPosition--;
2451     }
2452   }
2453
2454   public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2455     // the string with "\\u" is a legal string of two chars \ and u
2456     //thus we use a direct access to the source (for regular cases).
2457
2458     //    if (unicodeAsBackSlash) {
2459     //      // consume next character
2460     //      unicodeAsBackSlash = false;
2461     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2462     //        && (source[currentPosition] == 'u')) {
2463     //        getNextUnicodeChar();
2464     //      } else {
2465     //        if (withoutUnicodePtr != 0) {
2466     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2467     //        }
2468     //      }
2469     //    } else
2470     currentCharacter = source[currentPosition++];
2471     switch (currentCharacter) {
2472       //      case 'b' :
2473       //        currentCharacter = '\b';
2474       //        break;
2475       case 't' :
2476         currentCharacter = '\t';
2477         break;
2478       case 'n' :
2479         currentCharacter = '\n';
2480         break;
2481         //      case 'f' :
2482         //        currentCharacter = '\f';
2483         //        break;
2484       case 'r' :
2485         currentCharacter = '\r';
2486         break;
2487       case '\"' :
2488         currentCharacter = '\"';
2489         break;
2490       case '\'' :
2491         currentCharacter = '\'';
2492         break;
2493       case '\\' :
2494         currentCharacter = '\\';
2495         break;
2496       case '$' :
2497         currentCharacter = '$';
2498         break;
2499       default :
2500         // -----------octal escape--------------
2501         // OctalDigit
2502         // OctalDigit OctalDigit
2503         // ZeroToThree OctalDigit OctalDigit
2504
2505         int number = Character.getNumericValue(currentCharacter);
2506         if (number >= 0 && number <= 7) {
2507           boolean zeroToThreeNot = number > 3;
2508           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2509             int digit = Character.getNumericValue(currentCharacter);
2510             if (digit >= 0 && digit <= 7) {
2511               number = (number * 8) + digit;
2512               if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2513                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2514                   currentPosition--;
2515                 } else {
2516                   digit = Character.getNumericValue(currentCharacter);
2517                   if (digit >= 0 && digit <= 7) {
2518                     // has read \ZeroToThree OctalDigit OctalDigit
2519                     number = (number * 8) + digit;
2520                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2521                     currentPosition--;
2522                   }
2523                 }
2524               } else { // has read \OctalDigit NonDigit--> ignore last character
2525                 currentPosition--;
2526               }
2527             } else { // has read \OctalDigit NonOctalDigit--> ignore last character
2528               currentPosition--;
2529             }
2530           } else { // has read \OctalDigit --> ignore last character
2531             currentPosition--;
2532           }
2533           if (number > 255)
2534             throw new InvalidInputException(INVALID_ESCAPE);
2535           currentCharacter = (char) number;
2536         }
2537         //else
2538         //     throw new InvalidInputException(INVALID_ESCAPE);
2539     }
2540   }
2541
2542   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2543   //    return scanIdentifierOrKeyword( false );
2544   //  }
2545
2546   public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
2547     //test keywords
2548
2549     //first dispatch on the first char.
2550     //then the length. If there are several
2551     //keywors with the same length AND the same first char, then do another
2552     //disptach on the second char :-)...cool....but fast !
2553
2554     useAssertAsAnIndentifier = false;
2555
2556     while (getNextCharAsJavaIdentifierPart()) {
2557     };
2558
2559     if (isVariable) {
2560       if (new String(getCurrentTokenSource()).equals("$this")) {
2561         return TokenNamethis;
2562       }
2563       return TokenNameVariable;
2564     }
2565     int index, length;
2566     char[] data;
2567     char firstLetter;
2568     //    if (withoutUnicodePtr == 0)
2569
2570     //quick test on length == 1 but not on length > 12 while most identifier
2571     //have a length which is <= 12...but there are lots of identifier with
2572     //only one char....
2573
2574     //      {
2575     if ((length = currentPosition - startPosition) == 1)
2576       return TokenNameIdentifier;
2577     //  data = source;
2578     data = new char[length];
2579     index = startPosition;
2580     for (int i = 0; i < length; i++) {
2581       data[i] = Character.toLowerCase(source[index + i]);
2582     }
2583     index = 0;
2584     //    } else {
2585     //      if ((length = withoutUnicodePtr) == 1)
2586     //        return TokenNameIdentifier;
2587     //      // data = withoutUnicodeBuffer;
2588     //      data = new char[withoutUnicodeBuffer.length];
2589     //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2590     //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2591     //      }
2592     //      index = 1;
2593     //    }
2594
2595     firstLetter = data[index];
2596     switch (firstLetter) {
2597
2598       case 'a' : // as and array
2599         switch (length) {
2600           case 2 : //as
2601             if ((data[++index] == 's')) {
2602               return TokenNameas;
2603             } else {
2604               return TokenNameIdentifier;
2605             }
2606           case 3 : //and
2607             if ((data[++index] == 'n') && (data[++index] == 'd')) {
2608               return TokenNameAND;
2609             } else {
2610               return TokenNameIdentifier;
2611             }
2612             //          case 5 :
2613             //            if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2614             //              return TokenNamearray;
2615             //            else
2616             //              return TokenNameIdentifier;
2617           default :
2618             return TokenNameIdentifier;
2619         }
2620       case 'b' : //break
2621         switch (length) {
2622           case 5 :
2623             if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
2624               return TokenNamebreak;
2625             else
2626               return TokenNameIdentifier;
2627           default :
2628             return TokenNameIdentifier;
2629         }
2630
2631       case 'c' : //case class continue
2632         switch (length) {
2633           case 4 :
2634             if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
2635               return TokenNamecase;
2636             else
2637               return TokenNameIdentifier;
2638           case 5 :
2639             if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
2640               return TokenNameclass;
2641             else
2642               return TokenNameIdentifier;
2643           case 8 :
2644             if ((data[++index] == 'o')
2645               && (data[++index] == 'n')
2646               && (data[++index] == 't')
2647               && (data[++index] == 'i')
2648               && (data[++index] == 'n')
2649               && (data[++index] == 'u')
2650               && (data[++index] == 'e'))
2651               return TokenNamecontinue;
2652             else
2653               return TokenNameIdentifier;
2654           default :
2655             return TokenNameIdentifier;
2656         }
2657
2658       case 'd' : //define default do
2659         switch (length) {
2660           case 2 :
2661             if ((data[++index] == 'o'))
2662               return TokenNamedo;
2663             else
2664               return TokenNameIdentifier;
2665           case 6 :
2666             if ((data[++index] == 'e')
2667               && (data[++index] == 'f')
2668               && (data[++index] == 'i')
2669               && (data[++index] == 'n')
2670               && (data[++index] == 'e'))
2671               return TokenNamedefine;
2672             else
2673               return TokenNameIdentifier;
2674           case 7 :
2675             if ((data[++index] == 'e')
2676               && (data[++index] == 'f')
2677               && (data[++index] == 'a')
2678               && (data[++index] == 'u')
2679               && (data[++index] == 'l')
2680               && (data[++index] == 't'))
2681               return TokenNamedefault;
2682             else
2683               return TokenNameIdentifier;
2684           default :
2685             return TokenNameIdentifier;
2686         }
2687       case 'e' : //echo else elseif extends
2688         switch (length) {
2689           case 4 :
2690             if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
2691               return TokenNameecho;
2692             else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2693               return TokenNameelse;
2694             else
2695               return TokenNameIdentifier;
2696           case 5 : // endif
2697             if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
2698               return TokenNameendif;
2699             else
2700               return TokenNameIdentifier;
2701           case 6 : // endfor
2702             if ((data[++index] == 'n')
2703               && (data[++index] == 'd')
2704               && (data[++index] == 'f')
2705               && (data[++index] == 'o')
2706               && (data[++index] == 'r'))
2707               return TokenNameendfor;
2708             else if (
2709               (data[index] == 'l')
2710                 && (data[++index] == 's')
2711                 && (data[++index] == 'e')
2712                 && (data[++index] == 'i')
2713                 && (data[++index] == 'f'))
2714               return TokenNameelseif;
2715             else
2716               return TokenNameIdentifier;
2717           case 7 :
2718             if ((data[++index] == 'x')
2719               && (data[++index] == 't')
2720               && (data[++index] == 'e')
2721               && (data[++index] == 'n')
2722               && (data[++index] == 'd')
2723               && (data[++index] == 's'))
2724               return TokenNameextends;
2725             else
2726               return TokenNameIdentifier;
2727           case 8 : // endwhile
2728             if ((data[++index] == 'n')
2729               && (data[++index] == 'd')
2730               && (data[++index] == 'w')
2731               && (data[++index] == 'h')
2732               && (data[++index] == 'i')
2733               && (data[++index] == 'l')
2734               && (data[++index] == 'e'))
2735               return TokenNameendwhile;
2736             else
2737               return TokenNameIdentifier;
2738           case 9 : // endswitch
2739             if ((data[++index] == 'n')
2740               && (data[++index] == 'd')
2741               && (data[++index] == 's')
2742               && (data[++index] == 'w')
2743               && (data[++index] == 'i')
2744               && (data[++index] == 't')
2745               && (data[++index] == 'c')
2746               && (data[++index] == 'h'))
2747               return TokenNameendswitch;
2748             else
2749               return TokenNameIdentifier;
2750           case 10 : // endforeach
2751             if ((data[++index] == 'n')
2752               && (data[++index] == 'd')
2753               && (data[++index] == 'f')
2754               && (data[++index] == 'o')
2755               && (data[++index] == 'r')
2756               && (data[++index] == 'e')
2757               && (data[++index] == 'a')
2758               && (data[++index] == 'c')
2759               && (data[++index] == 'h'))
2760               return TokenNameendforeach;
2761             else
2762               return TokenNameIdentifier;
2763
2764           default :
2765             return TokenNameIdentifier;
2766         }
2767
2768       case 'f' : //for false function
2769         switch (length) {
2770           case 3 :
2771             if ((data[++index] == 'o') && (data[++index] == 'r'))
2772               return TokenNamefor;
2773             else
2774               return TokenNameIdentifier;
2775           case 5 :
2776             if ((data[++index] == 'a') && (data[++index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2777               return TokenNamefalse;
2778             else
2779               return TokenNameIdentifier;
2780           case 7 : // function
2781             if ((data[++index] == 'o')
2782               && (data[++index] == 'r')
2783               && (data[++index] == 'e')
2784               && (data[++index] == 'a')
2785               && (data[++index] == 'c')
2786               && (data[++index] == 'h'))
2787               return TokenNameforeach;
2788             else
2789               return TokenNameIdentifier;
2790           case 8 : // function
2791             if ((data[++index] == 'u')
2792               && (data[++index] == 'n')
2793               && (data[++index] == 'c')
2794               && (data[++index] == 't')
2795               && (data[++index] == 'i')
2796               && (data[++index] == 'o')
2797               && (data[++index] == 'n'))
2798               return TokenNamefunction;
2799             else
2800               return TokenNameIdentifier;
2801           default :
2802             return TokenNameIdentifier;
2803         }
2804       case 'g' : //global
2805         if (length == 6) {
2806           if ((data[++index] == 'l')
2807             && (data[++index] == 'o')
2808             && (data[++index] == 'b')
2809             && (data[++index] == 'a')
2810             && (data[++index] == 'l')) {
2811             return TokenNameglobal;
2812           }
2813         }
2814         return TokenNameIdentifier;
2815
2816       case 'i' : //if int
2817         switch (length) {
2818           case 2 :
2819             if (data[++index] == 'f')
2820               return TokenNameif;
2821             else
2822               return TokenNameIdentifier;
2823             //          case 3 :
2824             //            if ((data[++index] == 'n') && (data[++index] == 't'))
2825             //              return TokenNameint;
2826             //            else
2827             //              return TokenNameIdentifier;
2828           case 7 :
2829             if ((data[++index] == 'n')
2830               && (data[++index] == 'c')
2831               && (data[++index] == 'l')
2832               && (data[++index] == 'u')
2833               && (data[++index] == 'd')
2834               && (data[++index] == 'e'))
2835               return TokenNameinclude;
2836             else
2837               return TokenNameIdentifier;
2838           case 12 :
2839             if ((data[++index] == 'n')
2840               && (data[++index] == 'c')
2841               && (data[++index] == 'l')
2842               && (data[++index] == 'u')
2843               && (data[++index] == 'd')
2844               && (data[++index] == 'e')
2845               && (data[++index] == '_')
2846               && (data[++index] == 'o')
2847               && (data[++index] == 'n')
2848               && (data[++index] == 'c')
2849               && (data[++index] == 'e'))
2850               return TokenNameinclude_once;
2851             else
2852               return TokenNameIdentifier;
2853           default :
2854             return TokenNameIdentifier;
2855         }
2856
2857       case 'l' : //list
2858         if (length == 4) {
2859           if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
2860             return TokenNamelist;
2861           }
2862         }
2863         return TokenNameIdentifier;
2864
2865       case 'n' : // new null
2866         switch (length) {
2867           case 3 :
2868             if ((data[++index] == 'e') && (data[++index] == 'w'))
2869               return TokenNamenew;
2870             else
2871               return TokenNameIdentifier;
2872           case 4 :
2873             if ((data[++index] == 'u') && (data[++index] == 'l') && (data[++index] == 'l'))
2874               return TokenNamenull;
2875             else
2876               return TokenNameIdentifier;
2877
2878           default :
2879             return TokenNameIdentifier;
2880         }
2881       case 'o' : // or old_function
2882         if (length == 2) {
2883           if (data[++index] == 'r') {
2884             return TokenNameOR;
2885           }
2886         }
2887         //        if (length == 12) {
2888         //          if ((data[++index] == 'l')
2889         //            && (data[++index] == 'd')
2890         //            && (data[++index] == '_')
2891         //            && (data[++index] == 'f')
2892         //            && (data[++index] == 'u')
2893         //            && (data[++index] == 'n')
2894         //            && (data[++index] == 'c')
2895         //            && (data[++index] == 't')
2896         //            && (data[++index] == 'i')
2897         //            && (data[++index] == 'o')
2898         //            && (data[++index] == 'n')) {
2899         //            return TokenNameold_function;
2900         //          }
2901         //        }
2902         return TokenNameIdentifier;
2903
2904       case 'p' : // print
2905         if (length == 5) {
2906           if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
2907             return TokenNameprint;
2908           }
2909         }
2910         return TokenNameIdentifier;
2911       case 'r' : //return require require_once
2912         if (length == 6) {
2913           if ((data[++index] == 'e')
2914             && (data[++index] == 't')
2915             && (data[++index] == 'u')
2916             && (data[++index] == 'r')
2917             && (data[++index] == 'n')) {
2918             return TokenNamereturn;
2919           }
2920         } else if (length == 7) {
2921           if ((data[++index] == 'e')
2922             && (data[++index] == 'q')
2923             && (data[++index] == 'u')
2924             && (data[++index] == 'i')
2925             && (data[++index] == 'r')
2926             && (data[++index] == 'e')) {
2927             return TokenNamerequire;
2928           }
2929         } else if (length == 12) {
2930           if ((data[++index] == 'e')
2931             && (data[++index] == 'q')
2932             && (data[++index] == 'u')
2933             && (data[++index] == 'i')
2934             && (data[++index] == 'r')
2935             && (data[++index] == 'e')
2936             && (data[++index] == '_')
2937             && (data[++index] == 'o')
2938             && (data[++index] == 'n')
2939             && (data[++index] == 'c')
2940             && (data[++index] == 'e')) {
2941             return TokenNamerequire_once;
2942           }
2943         } else
2944           return TokenNameIdentifier;
2945
2946       case 's' : //static switch
2947         switch (length) {
2948           case 6 :
2949             if (data[++index] == 't')
2950               if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
2951                 return TokenNamestatic;
2952               } else
2953                 return TokenNameIdentifier;
2954             else if (
2955               (data[index] == 'w')
2956                 && (data[++index] == 'i')
2957                 && (data[++index] == 't')
2958                 && (data[++index] == 'c')
2959                 && (data[++index] == 'h'))
2960               return TokenNameswitch;
2961             else
2962               return TokenNameIdentifier;
2963           default :
2964             return TokenNameIdentifier;
2965         }
2966
2967       case 't' : // true
2968         switch (length) {
2969
2970           case 4 :
2971             if ((data[++index] == 'r') && (data[++index] == 'u') && (data[++index] == 'e'))
2972               return TokenNametrue;
2973             else
2974               return TokenNameIdentifier;
2975             //            if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
2976             //              return TokenNamethis;
2977
2978           default :
2979             return TokenNameIdentifier;
2980         }
2981
2982       case 'v' : //var
2983         switch (length) {
2984           case 3 :
2985             if ((data[++index] == 'a') && (data[++index] == 'r'))
2986               return TokenNamevar;
2987             else
2988               return TokenNameIdentifier;
2989
2990           default :
2991             return TokenNameIdentifier;
2992         }
2993
2994       case 'w' : //while
2995         switch (length) {
2996           case 5 :
2997             if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
2998               return TokenNamewhile;
2999             else
3000               return TokenNameIdentifier;
3001             //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
3002             //return TokenNamewidefp ;
3003             //else
3004             //return TokenNameIdentifier;
3005           default :
3006             return TokenNameIdentifier;
3007         }
3008
3009       case 'x' : //xor
3010         switch (length) {
3011           case 3 :
3012             if ((data[++index] == 'o') && (data[++index] == 'r'))
3013               return TokenNameXOR;
3014             else
3015               return TokenNameIdentifier;
3016
3017           default :
3018             return TokenNameIdentifier;
3019         }
3020       default :
3021         return TokenNameIdentifier;
3022     }
3023   }
3024   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3025
3026     //when entering this method the currentCharacter is the firt
3027     //digit of the number , i.e. it may be preceeded by a . when
3028     //dotPrefix is true
3029
3030     boolean floating = dotPrefix;
3031     if ((!dotPrefix) && (currentCharacter == '0')) {
3032       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3033         //force the first char of the hexa number do exist...
3034         // consume next character
3035         unicodeAsBackSlash = false;
3036         currentCharacter = source[currentPosition++];
3037         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3038         //          && (source[currentPosition] == 'u')) {
3039         //          getNextUnicodeChar();
3040         //        } else {
3041         //          if (withoutUnicodePtr != 0) {
3042         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3043         //          }
3044         //        }
3045         if (Character.digit(currentCharacter, 16) == -1)
3046           throw new InvalidInputException(INVALID_HEXA);
3047         //---end forcing--
3048         while (getNextCharAsDigit(16)) {
3049         };
3050         //        if (getNextChar('l', 'L') >= 0)
3051         //          return TokenNameLongLiteral;
3052         //        else
3053         return TokenNameIntegerLiteral;
3054       }
3055
3056       //there is x or X in the number
3057       //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3058       if (getNextCharAsDigit()) {
3059         //-------------potential octal-----------------
3060         while (getNextCharAsDigit()) {
3061         };
3062
3063         //        if (getNextChar('l', 'L') >= 0) {
3064         //          return TokenNameLongLiteral;
3065         //        }
3066         //
3067         //        if (getNextChar('f', 'F') >= 0) {
3068         //          return TokenNameFloatingPointLiteral;
3069         //        }
3070
3071         if (getNextChar('d', 'D') >= 0) {
3072           return TokenNameDoubleLiteral;
3073         } else { //make the distinction between octal and float ....
3074           if (getNextChar('.')) { //bingo ! ....
3075             while (getNextCharAsDigit()) {
3076             };
3077             if (getNextChar('e', 'E') >= 0) {
3078               // consume next character
3079               unicodeAsBackSlash = false;
3080               currentCharacter = source[currentPosition++];
3081               //              if (((currentCharacter = source[currentPosition++]) == '\\')
3082               //                && (source[currentPosition] == 'u')) {
3083               //                getNextUnicodeChar();
3084               //              } else {
3085               //                if (withoutUnicodePtr != 0) {
3086               //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3087               //                }
3088               //              }
3089
3090               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3091                 // consume next character
3092                 unicodeAsBackSlash = false;
3093                 currentCharacter = source[currentPosition++];
3094                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3095                 //                  && (source[currentPosition] == 'u')) {
3096                 //                  getNextUnicodeChar();
3097                 //                } else {
3098                 //                  if (withoutUnicodePtr != 0) {
3099                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3100                 //                      currentCharacter;
3101                 //                  }
3102                 //                }
3103               }
3104               if (!Character.isDigit(currentCharacter))
3105                 throw new InvalidInputException(INVALID_FLOAT);
3106               while (getNextCharAsDigit()) {
3107               };
3108             }
3109             //            if (getNextChar('f', 'F') >= 0)
3110             //              return TokenNameFloatingPointLiteral;
3111             getNextChar('d', 'D'); //jump over potential d or D
3112             return TokenNameDoubleLiteral;
3113           } else {
3114             return TokenNameIntegerLiteral;
3115           }
3116         }
3117       } else {
3118         /* carry on */
3119       }
3120     }
3121
3122     while (getNextCharAsDigit()) {
3123     };
3124
3125     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3126     //      return TokenNameLongLiteral;
3127
3128     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3129       while (getNextCharAsDigit()) {
3130       };
3131       floating = true;
3132     }
3133
3134     //if floating is true both exponant and suffix may be optional
3135
3136     if (getNextChar('e', 'E') >= 0) {
3137       floating = true;
3138       // consume next character
3139       unicodeAsBackSlash = false;
3140       currentCharacter = source[currentPosition++];
3141       //      if (((currentCharacter = source[currentPosition++]) == '\\')
3142       //        && (source[currentPosition] == 'u')) {
3143       //        getNextUnicodeChar();
3144       //      } else {
3145       //        if (withoutUnicodePtr != 0) {
3146       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3147       //        }
3148       //      }
3149
3150       if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume next character
3151         unicodeAsBackSlash = false;
3152         currentCharacter = source[currentPosition++];
3153         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3154         //          && (source[currentPosition] == 'u')) {
3155         //          getNextUnicodeChar();
3156         //        } else {
3157         //          if (withoutUnicodePtr != 0) {
3158         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3159         //          }
3160         //        }
3161       }
3162       if (!Character.isDigit(currentCharacter))
3163         throw new InvalidInputException(INVALID_FLOAT);
3164       while (getNextCharAsDigit()) {
3165       };
3166     }
3167
3168     if (getNextChar('d', 'D') >= 0)
3169       return TokenNameDoubleLiteral;
3170     //    if (getNextChar('f', 'F') >= 0)
3171     //      return TokenNameFloatingPointLiteral;
3172
3173     //the long flag has been tested before
3174
3175     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3176   }
3177   /**
3178    * Search the line number corresponding to a specific position
3179    *
3180    */
3181   public final int getLineNumber(int position) {
3182
3183     if (lineEnds == null)
3184       return 1;
3185     int length = linePtr + 1;
3186     if (length == 0)
3187       return 1;
3188     int g = 0, d = length - 1;
3189     int m = 0;
3190     while (g <= d) {
3191       m = (g + d) / 2;
3192       if (position < lineEnds[m]) {
3193         d = m - 1;
3194       } else if (position > lineEnds[m]) {
3195         g = m + 1;
3196       } else {
3197         return m + 1;
3198       }
3199     }
3200     if (position < lineEnds[m]) {
3201       return m + 1;
3202     }
3203     return m + 2;
3204   }
3205
3206   public void setPHPMode(boolean mode) {
3207     phpMode = mode;
3208   }
3209
3210   public final void setSource(char[] source) {
3211     //the source-buffer is set to sourceString
3212
3213     if (source == null) {
3214       this.source = new char[0];
3215     } else {
3216       this.source = source;
3217     }
3218     startPosition = -1;
3219     initialPosition = currentPosition = 0;
3220     containsAssertKeyword = false;
3221     withoutUnicodeBuffer = new char[this.source.length];
3222
3223   }
3224
3225   public String toString() {
3226     if (startPosition == source.length)
3227       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3228     if (currentPosition > source.length)
3229       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3230
3231     char front[] = new char[startPosition];
3232     System.arraycopy(source, 0, front, 0, startPosition);
3233
3234     int middleLength = (currentPosition - 1) - startPosition + 1;
3235     char middle[];
3236     if (middleLength > -1) {
3237       middle = new char[middleLength];
3238       System.arraycopy(source, startPosition, middle, 0, middleLength);
3239     } else {
3240       middle = new char[0];
3241     }
3242
3243     char end[] = new char[source.length - (currentPosition - 1)];
3244     System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3245
3246     return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3247     + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3248     + new String(end);
3249   }
3250   public final String toStringAction(int act) {
3251
3252     switch (act) {
3253       case TokenNameERROR :
3254         return "ScannerError"; // + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3255       case TokenNameStopPHP :
3256         return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3257       case TokenNameIdentifier :
3258         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3259       case TokenNameVariable :
3260         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3261       case TokenNameas :
3262         return "as"; //$NON-NLS-1$
3263       case TokenNamebreak :
3264         return "break"; //$NON-NLS-1$
3265       case TokenNamecase :
3266         return "case"; //$NON-NLS-1$
3267       case TokenNameclass :
3268         return "class"; //$NON-NLS-1$
3269       case TokenNamecontinue :
3270         return "continue"; //$NON-NLS-1$
3271       case TokenNamedefault :
3272         return "default"; //$NON-NLS-1$
3273       case TokenNamedefine :
3274         return "define"; //$NON-NLS-1$
3275       case TokenNamedo :
3276         return "do"; //$NON-NLS-1$
3277       case TokenNameecho :
3278         return "echo"; //$NON-NLS-1$
3279       case TokenNameelse :
3280         return "else"; //$NON-NLS-1$
3281       case TokenNameelseif :
3282         return "elseif"; //$NON-NLS-1$
3283       case TokenNameendfor :
3284         return "endfor"; //$NON-NLS-1$
3285       case TokenNameendforeach :
3286         return "endforeach"; //$NON-NLS-1$
3287       case TokenNameendif :
3288         return "endif"; //$NON-NLS-1$
3289       case TokenNameendswitch :
3290         return "endswitch"; //$NON-NLS-1$
3291       case TokenNameendwhile :
3292         return "endwhile"; //$NON-NLS-1$
3293       case TokenNameextends :
3294         return "extends"; //$NON-NLS-1$
3295       case TokenNamefalse :
3296         return "false"; //$NON-NLS-1$
3297       case TokenNamefor :
3298         return "for"; //$NON-NLS-1$
3299       case TokenNameforeach :
3300         return "foreach"; //$NON-NLS-1$
3301       case TokenNamefunction :
3302         return "function"; //$NON-NLS-1$
3303       case TokenNameglobal :
3304         return "global"; //$NON-NLS-1$
3305       case TokenNameif :
3306         return "if"; //$NON-NLS-1$
3307       case TokenNameinclude :
3308         return "include"; //$NON-NLS-1$
3309       case TokenNameinclude_once :
3310         return "include_once"; //$NON-NLS-1$
3311       case TokenNamelist :
3312         return "list"; //$NON-NLS-1$
3313       case TokenNamenew :
3314         return "new"; //$NON-NLS-1$
3315       case TokenNamenull :
3316         return "null"; //$NON-NLS-1$
3317       case TokenNameprint :
3318         return "print"; //$NON-NLS-1$
3319       case TokenNamerequire :
3320         return "require"; //$NON-NLS-1$
3321       case TokenNamerequire_once :
3322         return "require_once"; //$NON-NLS-1$
3323       case TokenNamereturn :
3324         return "return"; //$NON-NLS-1$
3325       case TokenNamestatic :
3326         return "static"; //$NON-NLS-1$
3327       case TokenNameswitch :
3328         return "switch"; //$NON-NLS-1$
3329       case TokenNametrue :
3330         return "true"; //$NON-NLS-1$
3331       case TokenNamevar :
3332         return "var"; //$NON-NLS-1$
3333       case TokenNamewhile :
3334         return "while"; //$NON-NLS-1$
3335       case TokenNamethis :
3336         return "$this"; //$NON-NLS-1$
3337       case TokenNameIntegerLiteral :
3338         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3339       case TokenNameDoubleLiteral :
3340         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3341       case TokenNameStringLiteral :
3342         return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3343       case TokenNameStringConstant :
3344         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3345       case TokenNameStringInterpolated :
3346         return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3347       case TokenNameHEREDOC :
3348         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3349
3350       case TokenNamePLUS_PLUS :
3351         return "++"; //$NON-NLS-1$
3352       case TokenNameMINUS_MINUS :
3353         return "--"; //$NON-NLS-1$
3354       case TokenNameEQUAL_EQUAL :
3355         return "=="; //$NON-NLS-1$
3356       case TokenNameEQUAL_EQUAL_EQUAL :
3357         return "==="; //$NON-NLS-1$
3358       case TokenNameEQUAL_GREATER :
3359         return "=>"; //$NON-NLS-1$
3360       case TokenNameLESS_EQUAL :
3361         return "<="; //$NON-NLS-1$
3362       case TokenNameGREATER_EQUAL :
3363         return ">="; //$NON-NLS-1$
3364       case TokenNameNOT_EQUAL :
3365         return "!="; //$NON-NLS-1$
3366       case TokenNameNOT_EQUAL_EQUAL :
3367         return "!=="; //$NON-NLS-1$
3368       case TokenNameLEFT_SHIFT :
3369         return "<<"; //$NON-NLS-1$
3370       case TokenNameRIGHT_SHIFT :
3371         return ">>"; //$NON-NLS-1$
3372       case TokenNamePLUS_EQUAL :
3373         return "+="; //$NON-NLS-1$
3374       case TokenNameMINUS_EQUAL :
3375         return "-="; //$NON-NLS-1$
3376       case TokenNameMULTIPLY_EQUAL :
3377         return "*="; //$NON-NLS-1$
3378       case TokenNameDIVIDE_EQUAL :
3379         return "/="; //$NON-NLS-1$
3380       case TokenNameAND_EQUAL :
3381         return "&="; //$NON-NLS-1$
3382       case TokenNameOR_EQUAL :
3383         return "|="; //$NON-NLS-1$
3384       case TokenNameXOR_EQUAL :
3385         return "^="; //$NON-NLS-1$
3386       case TokenNameREMAINDER_EQUAL :
3387         return "%="; //$NON-NLS-1$
3388       case TokenNameLEFT_SHIFT_EQUAL :
3389         return "<<="; //$NON-NLS-1$
3390       case TokenNameRIGHT_SHIFT_EQUAL :
3391         return ">>="; //$NON-NLS-1$
3392       case TokenNameOR_OR :
3393         return "||"; //$NON-NLS-1$
3394       case TokenNameAND_AND :
3395         return "&&"; //$NON-NLS-1$
3396       case TokenNamePLUS :
3397         return "+"; //$NON-NLS-1$
3398       case TokenNameMINUS :
3399         return "-"; //$NON-NLS-1$
3400       case TokenNameMINUS_GREATER :
3401         return "->";
3402       case TokenNameNOT :
3403         return "!"; //$NON-NLS-1$
3404       case TokenNameREMAINDER :
3405         return "%"; //$NON-NLS-1$
3406       case TokenNameXOR :
3407         return "^"; //$NON-NLS-1$
3408       case TokenNameAND :
3409         return "&"; //$NON-NLS-1$
3410       case TokenNameMULTIPLY :
3411         return "*"; //$NON-NLS-1$
3412       case TokenNameOR :
3413         return "|"; //$NON-NLS-1$
3414       case TokenNameTWIDDLE :
3415         return "~"; //$NON-NLS-1$
3416       case TokenNameTWIDDLE_EQUAL :
3417         return "~="; //$NON-NLS-1$
3418       case TokenNameDIVIDE :
3419         return "/"; //$NON-NLS-1$
3420       case TokenNameGREATER :
3421         return ">"; //$NON-NLS-1$
3422       case TokenNameLESS :
3423         return "<"; //$NON-NLS-1$
3424       case TokenNameLPAREN :
3425         return "("; //$NON-NLS-1$
3426       case TokenNameRPAREN :
3427         return ")"; //$NON-NLS-1$
3428       case TokenNameLBRACE :
3429         return "{"; //$NON-NLS-1$
3430       case TokenNameRBRACE :
3431         return "}"; //$NON-NLS-1$
3432       case TokenNameLBRACKET :
3433         return "["; //$NON-NLS-1$
3434       case TokenNameRBRACKET :
3435         return "]"; //$NON-NLS-1$
3436       case TokenNameSEMICOLON :
3437         return ";"; //$NON-NLS-1$
3438       case TokenNameQUESTION :
3439         return "?"; //$NON-NLS-1$
3440       case TokenNameCOLON :
3441         return ":"; //$NON-NLS-1$
3442       case TokenNameCOMMA :
3443         return ","; //$NON-NLS-1$
3444       case TokenNameDOT :
3445         return "."; //$NON-NLS-1$
3446       case TokenNameEQUAL :
3447         return "="; //$NON-NLS-1$
3448       case TokenNameAT :
3449         return "@";
3450       case TokenNameDOLLAR_LBRACE :
3451         return "${";
3452       case TokenNameEOF :
3453         return "EOF"; //$NON-NLS-1$
3454       case TokenNameWHITESPACE :
3455         return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3456       case TokenNameCOMMENT_LINE :
3457         return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3458       case TokenNameCOMMENT_BLOCK :
3459         return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3460       case TokenNameCOMMENT_PHPDOC :
3461         return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3462       case TokenNameHTML :
3463         return "HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3464       default :
3465         return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3466     }
3467   }
3468
3469   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
3470     this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
3471   }
3472
3473   public Scanner(
3474     boolean tokenizeComments,
3475     boolean tokenizeWhiteSpace,
3476     boolean checkNonExternalizedStringLiterals,
3477     boolean assertMode) {
3478     this.eofPosition = Integer.MAX_VALUE;
3479     this.tokenizeComments = tokenizeComments;
3480     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3481     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3482     this.assertMode = assertMode;
3483   }
3484
3485   private void checkNonExternalizeString() throws InvalidInputException {
3486     if (currentLine == null)
3487       return;
3488     parseTags(currentLine);
3489   }
3490
3491   private void parseTags(NLSLine line) throws InvalidInputException {
3492     String s = new String(getCurrentTokenSource());
3493     int pos = s.indexOf(TAG_PREFIX);
3494     int lineLength = line.size();
3495     while (pos != -1) {
3496       int start = pos + TAG_PREFIX_LENGTH;
3497       int end = s.indexOf(TAG_POSTFIX, start);
3498       String index = s.substring(start, end);
3499       int i = 0;
3500       try {
3501         i = Integer.parseInt(index) - 1;
3502         // Tags are one based not zero based.
3503       } catch (NumberFormatException e) {
3504         i = -1; // we don't want to consider this as a valid NLS tag
3505       }
3506       if (line.exists(i)) {
3507         line.set(i, null);
3508       }
3509       pos = s.indexOf(TAG_PREFIX, start);
3510     }
3511
3512     this.nonNLSStrings = new StringLiteral[lineLength];
3513     int nonNLSCounter = 0;
3514     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3515       StringLiteral literal = (StringLiteral) iterator.next();
3516       if (literal != null) {
3517         this.nonNLSStrings[nonNLSCounter++] = literal;
3518       }
3519     }
3520     if (nonNLSCounter == 0) {
3521       this.nonNLSStrings = null;
3522       currentLine = null;
3523       return;
3524     }
3525     this.wasNonExternalizedStringLiteral = true;
3526     if (nonNLSCounter != lineLength) {
3527       System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
3528     }
3529     currentLine = null;
3530   }
3531
3532
3533         public final void scanEscapeCharacter() throws InvalidInputException {
3534                 // the string with "\\u" is a legal string of two chars \ and u
3535                 //thus we use a direct access to the source (for regular cases).
3536
3537                 if (unicodeAsBackSlash) {
3538                         // consume next character
3539                         unicodeAsBackSlash = false;
3540 //                      if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
3541 //                              getNextUnicodeChar();
3542 //                      } else {
3543                                 if (withoutUnicodePtr != 0) {
3544                                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3545 //                              }
3546                         }
3547                 } else
3548                         currentCharacter = source[currentPosition++];
3549                 switch (currentCharacter) {
3550                         case 'b' :
3551                                 currentCharacter = '\b';
3552                                 break;
3553                         case 't' :
3554                                 currentCharacter = '\t';
3555                                 break;
3556                         case 'n' :
3557                                 currentCharacter = '\n';
3558                                 break;
3559                         case 'f' :
3560                                 currentCharacter = '\f';
3561                                 break;
3562                         case 'r' :
3563                                 currentCharacter = '\r';
3564                                 break;
3565                         case '\"' :
3566                                 currentCharacter = '\"';
3567                                 break;
3568                         case '\'' :
3569                                 currentCharacter = '\'';
3570                                 break;
3571                         case '\\' :
3572                                 currentCharacter = '\\';
3573                                 break;
3574                         default :
3575                                 // -----------octal escape--------------
3576                                 // OctalDigit
3577                                 // OctalDigit OctalDigit
3578                                 // ZeroToThree OctalDigit OctalDigit
3579
3580                                 int number = Character.getNumericValue(currentCharacter);
3581                                 if (number >= 0 && number <= 7) {
3582                                         boolean zeroToThreeNot = number > 3;
3583                                         if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3584                                                 int digit = Character.getNumericValue(currentCharacter);
3585                                                 if (digit >= 0 && digit <= 7) {
3586                                                         number = (number * 8) + digit;
3587                                                         if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3588                                                                 if (zeroToThreeNot) {// has read \NotZeroToThree OctalDigit Digit --> ignore last character
3589                                                                         currentPosition--;
3590                                                                 } else {
3591                                                                         digit = Character.getNumericValue(currentCharacter);
3592                                                                         if (digit >= 0 && digit <= 7){ // has read \ZeroToThree OctalDigit OctalDigit
3593                                                                                 number = (number * 8) + digit;
3594                                                                         } else {// has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
3595                                                                                 currentPosition--;
3596                                                                         }
3597                                                                 }
3598                                                         } else { // has read \OctalDigit NonDigit--> ignore last character
3599                                                                 currentPosition--;
3600                                                         }
3601                                                 } else { // has read \OctalDigit NonOctalDigit--> ignore last character
3602                                                         currentPosition--;
3603                                                 }
3604                                         } else { // has read \OctalDigit --> ignore last character
3605                                                 currentPosition--;
3606                                         }
3607                                         if (number > 255)
3608                                                 throw new InvalidInputException(INVALID_ESCAPE);
3609                                         currentCharacter = (char) number;
3610                                 } else
3611                                         throw new InvalidInputException(INVALID_ESCAPE);
3612                 }
3613         }
3614
3615 // chech presence of task: tags
3616 public void checkTaskTag(int commentStart, int commentEnd) {
3617
3618         // only look for newer task: tags
3619         if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount-1][0] >= commentStart) {
3620                 return;
3621         }
3622         int foundTaskIndex = this.foundTaskCount;
3623         nextChar: for (int i = commentStart; i < commentEnd && i < this.eofPosition; i++) {
3624
3625                 char[] tag = null;
3626                 char[] priority = null;
3627
3628                 // check for tag occurrence
3629                 nextTag: for (int itag = 0; itag < this.taskTags.length; itag++){
3630                         tag = this.taskTags[itag];
3631                         priority =
3632                                 this.taskPriorities != null && itag < this.taskPriorities.length ?
3633                                 this.taskPriorities[itag] :
3634                                 null;
3635                         int tagLength = tag.length;
3636                         for (int t = 0; t < tagLength; t++){
3637                                 if (this.source[i+t] != tag[t]) continue nextTag;
3638                         }
3639
3640                         if (this.foundTaskTags == null){
3641                                 this.foundTaskTags = new char[5][];
3642                                 this.foundTaskMessages = new char[5][];
3643                                 this.foundTaskPriorities = new char[5][];
3644                                 this.foundTaskPositions = new int[5][];
3645                         } else if (this.foundTaskCount == this.foundTaskTags.length) {
3646                                 System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount*2][], 0, this.foundTaskCount);
3647                                 System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount*2][], 0, this.foundTaskCount);
3648                                 System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount*2][], 0, this.foundTaskCount);
3649                                 System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount*2][], 0, this.foundTaskCount);
3650                         }
3651                         this.foundTaskTags[this.foundTaskCount] = tag;
3652                         this.foundTaskPriorities[this.foundTaskCount] = priority;
3653                         this.foundTaskPositions[this.foundTaskCount] = new int[]{ i, i+tagLength-1 };
3654                         this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
3655                         this.foundTaskCount++;
3656
3657                         i += tagLength-1; // will be incremented when looping
3658                 }
3659         }
3660
3661         for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
3662                 // retrieve message start and end positions
3663                 int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
3664                 int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd-1; // at most beginning of next task
3665                 if (max_value < msgStart) max_value = msgStart; // would only occur if tag is before EOF.
3666                 int end = -1;
3667                 char c;
3668
3669                 for (int j = msgStart; j < max_value; j++){
3670                         if ((c = this.source[j]) == '\n' || c == '\r'){
3671                                 end = j-1;
3672                                 break;
3673                         }
3674                 }
3675
3676                 if (end == -1){
3677                         for (int j = max_value; j > msgStart; j--){
3678                                 if ((c = this.source[j]) == '*') {
3679                                         end = j-1;
3680                                         break;
3681                                 }
3682                         }
3683                         if (end == -1) end = max_value;
3684                 }
3685
3686                 if (msgStart == end) continue; // empty
3687
3688                 // trim the message
3689                 while (CharOperation.isWhitespace(source[end]) && msgStart <= end) end--;
3690                 while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end) msgStart++;
3691
3692                 // update the end position of the task
3693                 this.foundTaskPositions[i][1] = end;
3694
3695                 // get the message source
3696                 final int messageLength = end-msgStart+1;
3697                 char[] message = new char[messageLength];
3698
3699                 System.arraycopy(source, msgStart, message, 0, messageLength);
3700                 this.foundTaskMessages[i] = message;
3701         }
3702 }
3703
3704 }