net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java

   1 /*******************************************************************************
   2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
   3  * All rights reserved. This program and the accompanying materials
   4  * are made available under the terms of the Common Public License v0.5
   5  * which accompanies this distribution, and is available at
   6  * http://www.eclipse.org/legal/cpl-v05.html
   7  *
   8  * Contributors:
   9  *     IBM Corporation - initial API and implementation
  10  ******************************************************************************/
  11 package net.sourceforge.phpdt.internal.compiler.parser;
  12
  13 import java.util.ArrayList;
  14 import java.util.Iterator;
  15 import java.util.List;
  16
  17 import net.sourceforge.phpdt.core.compiler.*;
  18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
  19
  20 public class Scanner implements IScanner, ITerminalSymbols {
  21
  22   /* APIs ares
  23    - getNextToken() which return the current type of the token
  24      (this value is not memorized by the scanner)
  25    - getCurrentTokenSource() which provides with the token "REAL" source
  26      (aka all unicode have been transformed into a correct char)
  27    - sourceStart gives the position into the stream
  28    - currentPosition-1 gives the sourceEnd position into the stream
  29   */
  30
  31   // 1.4 feature
  32   private boolean assertMode;
  33   public boolean useAssertAsAnIndentifier = false;
  34   //flag indicating if processed source contains occurrences of keyword assert
  35   public boolean containsAssertKeyword = false;
  36
  37   public boolean recordLineSeparator;
  38   public boolean phpMode = false;
  39
  40   public char currentCharacter;
  41   public int startPosition;
  42   public int currentPosition;
  43   public int initialPosition, eofPosition;
  44   // after this position eof are generated instead of real token from the source
  45
  46   public boolean tokenizeComments;
  47   public boolean tokenizeWhiteSpace;
  48
  49   //source should be viewed as a window (aka a part)
  50   //of a entire very large stream
  51   public char source[];
  52
  53   //unicode support
  54   public char[] withoutUnicodeBuffer;
  55   public int withoutUnicodePtr;
  56   //when == 0 ==> no unicode in the current token
  57   public boolean unicodeAsBackSlash = false;
  58
  59   public boolean scanningFloatLiteral = false;
  60
  61   //support for /** comments
  62   //public char[][] comments = new char[10][];
  63   public int[] commentStops = new int[10];
  64   public int[] commentStarts = new int[10];
  65   public int commentPtr = -1; // no comment test with commentPtr value -1
  66
  67   //diet parsing support - jump over some method body when requested
  68   public boolean diet = false;
  69
  70   //support for the  poor-line-debuggers ....
  71   //remember the position of the cr/lf
  72   public int[] lineEnds = new int[250];
  73   public int linePtr = -1;
  74   public boolean wasAcr = false;
  75
  76   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
  77
  78   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
  79   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
  80   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
  81   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
  82   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
  83   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
  84   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
  85
  86   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
  87   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
  88   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
  89   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
  90
  91   //----------------optimized identifier managment------------------
  92   static final char[] charArray_a = new char[] { 'a' },
  93     charArray_b = new char[] { 'b' },
  94     charArray_c = new char[] { 'c' },
  95     charArray_d = new char[] { 'd' },
  96     charArray_e = new char[] { 'e' },
  97     charArray_f = new char[] { 'f' },
  98     charArray_g = new char[] { 'g' },
  99     charArray_h = new char[] { 'h' },
 100     charArray_i = new char[] { 'i' },
 101     charArray_j = new char[] { 'j' },
 102     charArray_k = new char[] { 'k' },
 103     charArray_l = new char[] { 'l' },
 104     charArray_m = new char[] { 'm' },
 105     charArray_n = new char[] { 'n' },
 106     charArray_o = new char[] { 'o' },
 107     charArray_p = new char[] { 'p' },
 108     charArray_q = new char[] { 'q' },
 109     charArray_r = new char[] { 'r' },
 110     charArray_s = new char[] { 's' },
 111     charArray_t = new char[] { 't' },
 112     charArray_u = new char[] { 'u' },
 113     charArray_v = new char[] { 'v' },
 114     charArray_w = new char[] { 'w' },
 115     charArray_x = new char[] { 'x' },
 116     charArray_y = new char[] { 'y' },
 117     charArray_z = new char[] { 'z' };
 118
 119   static final char[] initCharArray =
 120     new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
 121   static final int TableSize = 30, InternalTableSize = 6;
 122   //30*6 = 180 entries
 123   public static final int OptimizedLength = 6;
 124   public /*static*/
 125   final char[][][][] charArray_length =
 126     new char[OptimizedLength][TableSize][InternalTableSize][];
 127   // support for detecting non-externalized string literals
 128   int currentLineNr = -1;
 129   int previousLineNr = -1;
 130   NLSLine currentLine = null;
 131   List lines = new ArrayList();
 132   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
 133   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
 134   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
 135   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
 136   public StringLiteral[] nonNLSStrings = null;
 137   public boolean checkNonExternalizedStringLiterals = true;
 138   public boolean wasNonExternalizedStringLiteral = false;
 139
 140   /*static*/ {
 141     for (int i = 0; i < 6; i++) {
 142       for (int j = 0; j < TableSize; j++) {
 143         for (int k = 0; k < InternalTableSize; k++) {
 144           charArray_length[i][j][k] = initCharArray;
 145         }
 146       }
 147     }
 148   }
 149   static int newEntry2 = 0,
 150     newEntry3 = 0,
 151     newEntry4 = 0,
 152     newEntry5 = 0,
 153     newEntry6 = 0;
 154
 155   public static final int RoundBracket = 0;
 156   public static final int SquareBracket = 1;
 157   public static final int CurlyBracket = 2;
 158   public static final int BracketKinds = 3;
 159
 160   public static final boolean DEBUG = false;
 161   public Scanner() {
 162     this(false, false);
 163   }
 164   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
 165     this(tokenizeComments, tokenizeWhiteSpace, false);
 166   }
 167
 168   /**
 169    * Determines if the specified character is
 170    * permissible as the first character in a PHP identifier
 171    */
 172   public static boolean isPHPIdentifierStart(char ch) {
 173     return Character.isLetter(ch)
 174       || (ch == '_')
 175       || (0x7F <= ch && ch <= 0xFF);
 176   }
 177
 178   /**
 179    * Determines if the specified character may be part of a PHP identifier as
 180    * other than the first character
 181    */
 182   public static boolean isPHPIdentifierPart(char ch) {
 183     return Character.isLetterOrDigit(ch)
 184       || (ch == '_')
 185       || (0x7F <= ch && ch <= 0xFF);
 186   }
 187
 188   public final boolean atEnd() {
 189     // This code is not relevant if source is
 190     // Only a part of the real stream input
 191
 192     return source.length == currentPosition;
 193   }
 194   public char[] getCurrentIdentifierSource() {
 195     //return the token REAL source (aka unicodes are precomputed)
 196
 197     char[] result;
 198     if (withoutUnicodePtr != 0)
 199       //0 is used as a fast test flag so the real first char is in position 1
 200       System.arraycopy(
 201         withoutUnicodeBuffer,
 202         1,
 203         result = new char[withoutUnicodePtr],
 204         0,
 205         withoutUnicodePtr);
 206     else {
 207       int length = currentPosition - startPosition;
 208       switch (length) { // see OptimizedLength
 209         case 1 :
 210           return optimizedCurrentTokenSource1();
 211         case 2 :
 212           return optimizedCurrentTokenSource2();
 213         case 3 :
 214           return optimizedCurrentTokenSource3();
 215         case 4 :
 216           return optimizedCurrentTokenSource4();
 217         case 5 :
 218           return optimizedCurrentTokenSource5();
 219         case 6 :
 220           return optimizedCurrentTokenSource6();
 221       }
 222       //no optimization
 223       System.arraycopy(
 224         source,
 225         startPosition,
 226         result = new char[length],
 227         0,
 228         length);
 229     }
 230     return result;
 231   }
 232   public int getCurrentTokenEndPosition() {
 233     return this.currentPosition - 1;
 234   }
 235   public final char[] getCurrentTokenSource() {
 236     // Return the token REAL source (aka unicodes are precomputed)
 237
 238     char[] result;
 239     if (withoutUnicodePtr != 0)
 240       // 0 is used as a fast test flag so the real first char is in position 1
 241       System.arraycopy(
 242         withoutUnicodeBuffer,
 243         1,
 244         result = new char[withoutUnicodePtr],
 245         0,
 246         withoutUnicodePtr);
 247     else {
 248       int length;
 249       System.arraycopy(
 250         source,
 251         startPosition,
 252         result = new char[length = currentPosition - startPosition],
 253         0,
 254         length);
 255     }
 256     return result;
 257   }
 258
 259   public final char[] getCurrentTokenSource(int startPos) {
 260     // Return the token REAL source (aka unicodes are precomputed)
 261
 262     char[] result;
 263     if (withoutUnicodePtr != 0)
 264       // 0 is used as a fast test flag so the real first char is in position 1
 265       System.arraycopy(
 266         withoutUnicodeBuffer,
 267         1,
 268         result = new char[withoutUnicodePtr],
 269         0,
 270         withoutUnicodePtr);
 271     else {
 272       int length;
 273       System.arraycopy(
 274         source,
 275         startPos,
 276         result = new char[length = currentPosition - startPos],
 277         0,
 278         length);
 279     }
 280     return result;
 281   }
 282
 283   public final char[] getCurrentTokenSourceString() {
 284     //return the token REAL source (aka unicodes are precomputed).
 285     //REMOVE the two " that are at the beginning and the end.
 286
 287     char[] result;
 288     if (withoutUnicodePtr != 0)
 289       //0 is used as a fast test flag so the real first char is in position 1
 290       System.arraycopy(withoutUnicodeBuffer, 2,
 291       //2 is 1 (real start) + 1 (to jump over the ")
 292       result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
 293     else {
 294       int length;
 295       System.arraycopy(
 296         source,
 297         startPosition + 1,
 298         result = new char[length = currentPosition - startPosition - 2],
 299         0,
 300         length);
 301     }
 302     return result;
 303   }
 304   public int getCurrentTokenStartPosition() {
 305     return this.startPosition;
 306   }
 307   /*
 308    * Search the source position corresponding to the end of a given line number
 309    *
 310    * Line numbers are 1-based, and relative to the scanner initialPosition.
 311    * Character positions are 0-based.
 312    *
 313    * In case the given line number is inconsistent, answers -1.
 314    */
 315   public final int getLineEnd(int lineNumber) {
 316
 317     if (lineEnds == null)
 318       return -1;
 319     if (lineNumber >= lineEnds.length)
 320       return -1;
 321     if (lineNumber <= 0)
 322       return -1;
 323
 324     if (lineNumber == lineEnds.length - 1)
 325       return eofPosition;
 326     return lineEnds[lineNumber - 1];
 327     // next line start one character behind the lineEnd of the previous line
 328   }
 329   /**
 330    * Search the source position corresponding to the beginning of a given line number
 331    *
 332    * Line numbers are 1-based, and relative to the scanner initialPosition.
 333    * Character positions are 0-based.
 334    *
 335    * e.g.       getLineStart(1) --> 0   i.e. first line starts at character 0.
 336    *
 337    * In case the given line number is inconsistent, answers -1.
 338    */
 339   public final int getLineStart(int lineNumber) {
 340
 341     if (lineEnds == null)
 342       return -1;
 343     if (lineNumber >= lineEnds.length)
 344       return -1;
 345     if (lineNumber <= 0)
 346       return -1;
 347
 348     if (lineNumber == 1)
 349       return initialPosition;
 350     return lineEnds[lineNumber - 2] + 1;
 351     // next line start one character behind the lineEnd of the previous line
 352   }
 353   public final boolean getNextChar(char testedChar) {
 354     //BOOLEAN
 355     //handle the case of unicode.
 356     //when a unicode appears then we must use a buffer that holds char internal values
 357     //At the end of this method currentCharacter holds the new visited char
 358     //and currentPosition points right next after it
 359     //Both previous lines are true if the currentCharacter is == to the testedChar
 360     //On false, no side effect has occured.
 361
 362     //ALL getNextChar.... ARE OPTIMIZED COPIES
 363
 364     int temp = currentPosition;
 365     try {
 366       if (((currentCharacter = source[currentPosition++]) == '\\')
 367         && (source[currentPosition] == 'u')) {
 368         //-------------unicode traitement ------------
 369         int c1, c2, c3, c4;
 370         int unicodeSize = 6;
 371         currentPosition++;
 372         while (source[currentPosition] == 'u') {
 373           currentPosition++;
 374           unicodeSize++;
 375         }
 376
 377         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 378           || c1 < 0)
 379           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 380             || c2 < 0)
 381           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 382             || c3 < 0)
 383           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 384             || c4 < 0)) {
 385           currentPosition = temp;
 386           return false;
 387         }
 388
 389         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 390         if (currentCharacter != testedChar) {
 391           currentPosition = temp;
 392           return false;
 393         }
 394         unicodeAsBackSlash = currentCharacter == '\\';
 395
 396         //need the unicode buffer
 397         if (withoutUnicodePtr == 0) {
 398           //buffer all the entries that have been left aside....
 399           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 400           System.arraycopy(
 401             source,
 402             startPosition,
 403             withoutUnicodeBuffer,
 404             1,
 405             withoutUnicodePtr);
 406         }
 407         //fill the buffer with the char
 408         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 409         return true;
 410
 411       } //-------------end unicode traitement--------------
 412       else {
 413         if (currentCharacter != testedChar) {
 414           currentPosition = temp;
 415           return false;
 416         }
 417         unicodeAsBackSlash = false;
 418         if (withoutUnicodePtr != 0)
 419           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 420         return true;
 421       }
 422     } catch (IndexOutOfBoundsException e) {
 423       unicodeAsBackSlash = false;
 424       currentPosition = temp;
 425       return false;
 426     }
 427   }
 428   public final int getNextChar(char testedChar1, char testedChar2) {
 429     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
 430     //test can be done with (x==0) for the first and (x>0) for the second
 431     //handle the case of unicode.
 432     //when a unicode appears then we must use a buffer that holds char internal values
 433     //At the end of this method currentCharacter holds the new visited char
 434     //and currentPosition points right next after it
 435     //Both previous lines are true if the currentCharacter is == to the testedChar1/2
 436     //On false, no side effect has occured.
 437
 438     //ALL getNextChar.... ARE OPTIMIZED COPIES
 439
 440     int temp = currentPosition;
 441     try {
 442       int result;
 443       if (((currentCharacter = source[currentPosition++]) == '\\')
 444         && (source[currentPosition] == 'u')) {
 445         //-------------unicode traitement ------------
 446         int c1, c2, c3, c4;
 447         int unicodeSize = 6;
 448         currentPosition++;
 449         while (source[currentPosition] == 'u') {
 450           currentPosition++;
 451           unicodeSize++;
 452         }
 453
 454         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 455           || c1 < 0)
 456           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 457             || c2 < 0)
 458           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 459             || c3 < 0)
 460           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 461             || c4 < 0)) {
 462           currentPosition = temp;
 463           return 2;
 464         }
 465
 466         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 467         if (currentCharacter == testedChar1)
 468           result = 0;
 469         else if (currentCharacter == testedChar2)
 470           result = 1;
 471         else {
 472           currentPosition = temp;
 473           return -1;
 474         }
 475
 476         //need the unicode buffer
 477         if (withoutUnicodePtr == 0) {
 478           //buffer all the entries that have been left aside....
 479           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 480           System.arraycopy(
 481             source,
 482             startPosition,
 483             withoutUnicodeBuffer,
 484             1,
 485             withoutUnicodePtr);
 486         }
 487         //fill the buffer with the char
 488         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 489         return result;
 490       } //-------------end unicode traitement--------------
 491       else {
 492         if (currentCharacter == testedChar1)
 493           result = 0;
 494         else if (currentCharacter == testedChar2)
 495           result = 1;
 496         else {
 497           currentPosition = temp;
 498           return -1;
 499         }
 500
 501         if (withoutUnicodePtr != 0)
 502           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 503         return result;
 504       }
 505     } catch (IndexOutOfBoundsException e) {
 506       currentPosition = temp;
 507       return -1;
 508     }
 509   }
 510   public final boolean getNextCharAsDigit() {
 511     //BOOLEAN
 512     //handle the case of unicode.
 513     //when a unicode appears then we must use a buffer that holds char internal values
 514     //At the end of this method currentCharacter holds the new visited char
 515     //and currentPosition points right next after it
 516     //Both previous lines are true if the currentCharacter is a digit
 517     //On false, no side effect has occured.
 518
 519     //ALL getNextChar.... ARE OPTIMIZED COPIES
 520
 521     int temp = currentPosition;
 522     try {
 523       if (((currentCharacter = source[currentPosition++]) == '\\')
 524         && (source[currentPosition] == 'u')) {
 525         //-------------unicode traitement ------------
 526         int c1, c2, c3, c4;
 527         int unicodeSize = 6;
 528         currentPosition++;
 529         while (source[currentPosition] == 'u') {
 530           currentPosition++;
 531           unicodeSize++;
 532         }
 533
 534         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 535           || c1 < 0)
 536           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 537             || c2 < 0)
 538           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 539             || c3 < 0)
 540           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 541             || c4 < 0)) {
 542           currentPosition = temp;
 543           return false;
 544         }
 545
 546         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 547         if (!Character.isDigit(currentCharacter)) {
 548           currentPosition = temp;
 549           return false;
 550         }
 551
 552         //need the unicode buffer
 553         if (withoutUnicodePtr == 0) {
 554           //buffer all the entries that have been left aside....
 555           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 556           System.arraycopy(
 557             source,
 558             startPosition,
 559             withoutUnicodeBuffer,
 560             1,
 561             withoutUnicodePtr);
 562         }
 563         //fill the buffer with the char
 564         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 565         return true;
 566       } //-------------end unicode traitement--------------
 567       else {
 568         if (!Character.isDigit(currentCharacter)) {
 569           currentPosition = temp;
 570           return false;
 571         }
 572         if (withoutUnicodePtr != 0)
 573           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 574         return true;
 575       }
 576     } catch (IndexOutOfBoundsException e) {
 577       currentPosition = temp;
 578       return false;
 579     }
 580   }
 581   public final boolean getNextCharAsDigit(int radix) {
 582     //BOOLEAN
 583     //handle the case of unicode.
 584     //when a unicode appears then we must use a buffer that holds char internal values
 585     //At the end of this method currentCharacter holds the new visited char
 586     //and currentPosition points right next after it
 587     //Both previous lines are true if the currentCharacter is a digit base on radix
 588     //On false, no side effect has occured.
 589
 590     //ALL getNextChar.... ARE OPTIMIZED COPIES
 591
 592     int temp = currentPosition;
 593     try {
 594       if (((currentCharacter = source[currentPosition++]) == '\\')
 595         && (source[currentPosition] == 'u')) {
 596         //-------------unicode traitement ------------
 597         int c1, c2, c3, c4;
 598         int unicodeSize = 6;
 599         currentPosition++;
 600         while (source[currentPosition] == 'u') {
 601           currentPosition++;
 602           unicodeSize++;
 603         }
 604
 605         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 606           || c1 < 0)
 607           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 608             || c2 < 0)
 609           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 610             || c3 < 0)
 611           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 612             || c4 < 0)) {
 613           currentPosition = temp;
 614           return false;
 615         }
 616
 617         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 618         if (Character.digit(currentCharacter, radix) == -1) {
 619           currentPosition = temp;
 620           return false;
 621         }
 622
 623         //need the unicode buffer
 624         if (withoutUnicodePtr == 0) {
 625           //buffer all the entries that have been left aside....
 626           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 627           System.arraycopy(
 628             source,
 629             startPosition,
 630             withoutUnicodeBuffer,
 631             1,
 632             withoutUnicodePtr);
 633         }
 634         //fill the buffer with the char
 635         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 636         return true;
 637       } //-------------end unicode traitement--------------
 638       else {
 639         if (Character.digit(currentCharacter, radix) == -1) {
 640           currentPosition = temp;
 641           return false;
 642         }
 643         if (withoutUnicodePtr != 0)
 644           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 645         return true;
 646       }
 647     } catch (IndexOutOfBoundsException e) {
 648       currentPosition = temp;
 649       return false;
 650     }
 651   }
 652   public boolean getNextCharAsJavaIdentifierPart() {
 653     //BOOLEAN
 654     //handle the case of unicode.
 655     //when a unicode appears then we must use a buffer that holds char internal values
 656     //At the end of this method currentCharacter holds the new visited char
 657     //and currentPosition points right next after it
 658     //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
 659     //On false, no side effect has occured.
 660
 661     //ALL getNextChar.... ARE OPTIMIZED COPIES
 662
 663     int temp = currentPosition;
 664     try {
 665       if (((currentCharacter = source[currentPosition++]) == '\\')
 666         && (source[currentPosition] == 'u')) {
 667         //-------------unicode traitement ------------
 668         int c1, c2, c3, c4;
 669         int unicodeSize = 6;
 670         currentPosition++;
 671         while (source[currentPosition] == 'u') {
 672           currentPosition++;
 673           unicodeSize++;
 674         }
 675
 676         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 677           || c1 < 0)
 678           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 679             || c2 < 0)
 680           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 681             || c3 < 0)
 682           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 683             || c4 < 0)) {
 684           currentPosition = temp;
 685           return false;
 686         }
 687
 688         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 689         if (!isPHPIdentifierPart(currentCharacter)) {
 690           currentPosition = temp;
 691           return false;
 692         }
 693
 694         //need the unicode buffer
 695         if (withoutUnicodePtr == 0) {
 696           //buffer all the entries that have been left aside....
 697           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 698           System.arraycopy(
 699             source,
 700             startPosition,
 701             withoutUnicodeBuffer,
 702             1,
 703             withoutUnicodePtr);
 704         }
 705         //fill the buffer with the char
 706         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 707         return true;
 708       } //-------------end unicode traitement--------------
 709       else {
 710         if (!isPHPIdentifierPart(currentCharacter)) {
 711           currentPosition = temp;
 712           return false;
 713         }
 714
 715         if (withoutUnicodePtr != 0)
 716           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 717         return true;
 718       }
 719     } catch (IndexOutOfBoundsException e) {
 720       currentPosition = temp;
 721       return false;
 722     }
 723   }
 724
 725   public int getNextToken() throws InvalidInputException {
 726     int htmlPosition = currentPosition;
 727     try {
 728       while (!phpMode) {
 729         currentCharacter = source[currentPosition++];
 730         if (currentCharacter == '<') {
 731           if (getNextChar('?')) {
 732             currentCharacter = source[currentPosition++];
 733             if ((currentCharacter == ' ')
 734               || Character.isWhitespace(currentCharacter)) {
 735               // <?
 736               startPosition = currentPosition;
 737               phpMode = true;
 738               if (tokenizeWhiteSpace) {
 739                 // && (whiteStart != currentPosition - 1)) {
 740                 // reposition scanner in case we are interested by spaces as tokens
 741                 startPosition = htmlPosition;
 742                 return TokenNameHTML;
 743               }
 744             } else {
 745               boolean phpStart =
 746                 (currentCharacter == 'P') || (currentCharacter == 'p');
 747               if (phpStart) {
 748                 int test = getNextChar('H', 'h');
 749                 if (test >= 0) {
 750                   test = getNextChar('P', 'p');
 751                   if (test >= 0) {
 752                     // <?PHP  <?php
 753                     startPosition = currentPosition;
 754                     phpMode = true;
 755
 756                     if (tokenizeWhiteSpace) {
 757                       // && (whiteStart != currentPosition - 1)) {
 758                       // reposition scanner in case we are interested by spaces as tokens
 759                       startPosition = htmlPosition;
 760                       return TokenNameHTML;
 761                     }
 762                   }
 763                 }
 764               }
 765             }
 766           }
 767         }
 768
 769         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 770           if (recordLineSeparator) {
 771             pushLineSeparator();
 772           } else {
 773             currentLine = null;
 774           }
 775         }
 776       }
 777     } //-----------------end switch while try--------------------
 778     catch (IndexOutOfBoundsException e) {
 779       if (tokenizeWhiteSpace) {
 780         // && (whiteStart != currentPosition - 1)) {
 781         // reposition scanner in case we are interested by spaces as tokens
 782         startPosition = htmlPosition;
 783       }
 784       return TokenNameEOF;
 785     }
 786
 787     if (phpMode) {
 788       this.wasAcr = false;
 789       if (diet) {
 790         jumpOverMethodBody();
 791         diet = false;
 792         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
 793       }
 794       try {
 795         while (true) { //loop for jumping over comments
 796           withoutUnicodePtr = 0;
 797           //start with a new token (even comment written with unicode )
 798
 799           // ---------Consume white space and handles startPosition---------
 800           int whiteStart = currentPosition;
 801           boolean isWhiteSpace;
 802           do {
 803             startPosition = currentPosition;
 804             if (((currentCharacter = source[currentPosition++]) == '\\')
 805               && (source[currentPosition] == 'u')) {
 806               isWhiteSpace = jumpOverUnicodeWhiteSpace();
 807             } else {
 808               if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 809                 checkNonExternalizeString();
 810                 if (recordLineSeparator) {
 811                   pushLineSeparator();
 812                 } else {
 813                   currentLine = null;
 814                 }
 815               }
 816               isWhiteSpace =
 817                 (currentCharacter == ' ')
 818                   || Character.isWhitespace(currentCharacter);
 819             }
 820           } while (isWhiteSpace);
 821           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
 822             // reposition scanner in case we are interested by spaces as tokens
 823             currentPosition--;
 824             startPosition = whiteStart;
 825             return TokenNameWHITESPACE;
 826           }
 827           //little trick to get out in the middle of a source compuation
 828           if (currentPosition > eofPosition)
 829             return TokenNameEOF;
 830
 831           // ---------Identify the next token-------------
 832
 833           switch (currentCharacter) {
 834             case '(' :
 835               return TokenNameLPAREN;
 836             case ')' :
 837               return TokenNameRPAREN;
 838             case '{' :
 839               return TokenNameLBRACE;
 840             case '}' :
 841               return TokenNameRBRACE;
 842             case '[' :
 843               return TokenNameLBRACKET;
 844             case ']' :
 845               return TokenNameRBRACKET;
 846             case ';' :
 847               return TokenNameSEMICOLON;
 848             case ',' :
 849               return TokenNameCOMMA;
 850
 851             case '.' :
 852               if (getNextCharAsDigit())
 853                 return scanNumber(true);
 854               return TokenNameDOT;
 855             case '+' :
 856               {
 857                 int test;
 858                 if ((test = getNextChar('+', '=')) == 0)
 859                   return TokenNamePLUS_PLUS;
 860                 if (test > 0)
 861                   return TokenNamePLUS_EQUAL;
 862                 return TokenNamePLUS;
 863               }
 864             case '-' :
 865               {
 866                 int test;
 867                 if ((test = getNextChar('-', '=')) == 0)
 868                   return TokenNameMINUS_MINUS;
 869                 if (test > 0)
 870                   return TokenNameMINUS_EQUAL;
 871                 if (getNextChar('>'))
 872                   return TokenNameMINUS_GREATER;
 873
 874                 return TokenNameMINUS;
 875               }
 876             case '~' :
 877               if (getNextChar('='))
 878                 return TokenNameTWIDDLE_EQUAL;
 879               return TokenNameTWIDDLE;
 880             case '!' :
 881               if (getNextChar('='))
 882                 return TokenNameNOT_EQUAL;
 883               return TokenNameNOT;
 884             case '*' :
 885               if (getNextChar('='))
 886                 return TokenNameMULTIPLY_EQUAL;
 887               return TokenNameMULTIPLY;
 888             case '%' :
 889               if (getNextChar('='))
 890                 return TokenNameREMAINDER_EQUAL;
 891               return TokenNameREMAINDER;
 892             case '<' :
 893               {
 894                 int test;
 895                 if ((test = getNextChar('=', '<')) == 0)
 896                   return TokenNameLESS_EQUAL;
 897                 if (test > 0) {
 898                   if (getNextChar('='))
 899                     return TokenNameLEFT_SHIFT_EQUAL;
 900                   if (getNextChar('<')) {
 901                     int heredocStart = currentPosition;
 902                     int heredocLength = 0;
 903                     currentCharacter = source[currentPosition++];
 904                     if (isPHPIdentifierStart(currentCharacter)) {
 905                       currentCharacter = source[currentPosition++];
 906                     } else {
 907                       return TokenNameERROR;
 908                     }
 909                     while (isPHPIdentifierPart(currentCharacter)) {
 910                       currentCharacter = source[currentPosition++];
 911                     }
 912
 913                     heredocLength = currentPosition - heredocStart - 1;
 914
 915                     // heredoc end-tag determination
 916                     boolean endTag = true;
 917                     char ch;
 918                     do {
 919                       ch = source[currentPosition++];
 920                       if (ch == '\r' || ch == '\n') {
 921                         if (recordLineSeparator) {
 922                           pushLineSeparator();
 923                         } else {
 924                           currentLine = null;
 925                         }
 926                         for (int i = 0; i < heredocLength; i++) {
 927                           if (source[currentPosition + i]
 928                             != source[heredocStart + i]) {
 929                             endTag = false;
 930                             break;
 931                           }
 932                         }
 933                         if (endTag) {
 934                           currentPosition += heredocLength - 1;
 935                           currentCharacter = source[currentPosition++];
 936                           break; // do...while loop
 937                         } else {
 938                           endTag = true;
 939                         }
 940                       }
 941
 942                     } while (true);
 943
 944                     return TokenNameHEREDOC;
 945                   }
 946                   return TokenNameLEFT_SHIFT;
 947                 }
 948                 return TokenNameLESS;
 949               }
 950             case '>' :
 951               {
 952                 int test;
 953                 if ((test = getNextChar('=', '>')) == 0)
 954                   return TokenNameGREATER_EQUAL;
 955                 if (test > 0) {
 956                   if ((test = getNextChar('=', '>')) == 0)
 957                     return TokenNameRIGHT_SHIFT_EQUAL;
 958                   return TokenNameRIGHT_SHIFT;
 959                 }
 960                 return TokenNameGREATER;
 961               }
 962             case '=' :
 963               if (getNextChar('='))
 964                 return TokenNameEQUAL_EQUAL;
 965               if (getNextChar('>'))
 966                 return TokenNameEQUAL_GREATER;
 967               return TokenNameEQUAL;
 968             case '&' :
 969               {
 970                 int test;
 971                 if ((test = getNextChar('&', '=')) == 0)
 972                   return TokenNameAND_AND;
 973                 if (test > 0)
 974                   return TokenNameAND_EQUAL;
 975                 return TokenNameAND;
 976               }
 977             case '|' :
 978               {
 979                 int test;
 980                 if ((test = getNextChar('|', '=')) == 0)
 981                   return TokenNameOR_OR;
 982                 if (test > 0)
 983                   return TokenNameOR_EQUAL;
 984                 return TokenNameOR;
 985               }
 986             case '^' :
 987               if (getNextChar('='))
 988                 return TokenNameXOR_EQUAL;
 989               return TokenNameXOR;
 990             case '?' :
 991               if (getNextChar('>')) {
 992                 phpMode = false;
 993                 return TokenNameStopPHP;
 994               }
 995               return TokenNameQUESTION;
 996             case ':' :
 997               if (getNextChar(':'))
 998                 return TokenNameCOLON_COLON;
 999               return TokenNameCOLON;
1000             case '@' :
1001               return TokenNameAT;
1002               //                                        case '\'' :
1003               //                                                {
1004               //                                                        int test;
1005               //                                                        if ((test = getNextChar('\n', '\r')) == 0) {
1006               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1007               //                                                        }
1008               //                                                        if (test > 0) {
1009               //                                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1010               //                                                                for (int lookAhead = 0;
1011               //                                                                        lookAhead < 3;
1012               //                                                                        lookAhead++) {
1013               //                                                                        if (currentPosition + lookAhead
1014               //                                                                                == source.length)
1015               //                                                                                break;
1016               //                                                                        if (source[currentPosition + lookAhead]
1017               //                                                                                == '\n')
1018               //                                                                                break;
1019               //                                                                        if (source[currentPosition + lookAhead]
1020               //                                                                                == '\'') {
1021               //                                                                                currentPosition += lookAhead + 1;
1022               //                                                                                break;
1023               //                                                                        }
1024               //                                                                }
1025               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1026               //                                                        }
1027               //                                                }
1028               //                                                if (getNextChar('\'')) {
1029               //                                                        // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1030               //                                                        for (int lookAhead = 0;
1031               //                                                                lookAhead < 3;
1032               //                                                                lookAhead++) {
1033               //                                                                if (currentPosition + lookAhead
1034               //                                                                        == source.length)
1035               //                                                                        break;
1036               //                                                                if (source[currentPosition + lookAhead]
1037               //                                                                        == '\n')
1038               //                                                                        break;
1039               //                                                                if (source[currentPosition + lookAhead]
1040               //                                                                        == '\'') {
1041               //                                                                        currentPosition += lookAhead + 1;
1042               //                                                                        break;
1043               //                                                                }
1044               //                                                        }
1045               //                                                        throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1046               //                                                }
1047               //                                                if (getNextChar('\\'))
1048               //                                                        scanEscapeCharacter();
1049               //                                                else { // consume next character
1050               //                                                        unicodeAsBackSlash = false;
1051               //                                                        if (((currentCharacter = source[currentPosition++])
1052               //                                                                == '\\')
1053               //                                                                && (source[currentPosition] == 'u')) {
1054               //                                                                getNextUnicodeChar();
1055               //                                                        } else {
1056               //                                                                if (withoutUnicodePtr != 0) {
1057               //                                                                        withoutUnicodeBuffer[++withoutUnicodePtr] =
1058               //                                                                                currentCharacter;
1059               //                                                                }
1060               //                                                        }
1061               //                                                }
1062               //                                                //            if (getNextChar('\''))
1063               //                                                //              return TokenNameCharacterLiteral;
1064               //                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1065               //                                                for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1066               //                                                        if (currentPosition + lookAhead == source.length)
1067               //                                                                break;
1068               //                                                        if (source[currentPosition + lookAhead] == '\n')
1069               //                                                                break;
1070               //                                                        if (source[currentPosition + lookAhead] == '\'') {
1071               //                                                                currentPosition += lookAhead + 1;
1072               //                                                                break;
1073               //                                                        }
1074               //                                                }
1075               //                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1076             case '\'' :
1077               try {
1078                 // consume next character
1079                 unicodeAsBackSlash = false;
1080                 if (((currentCharacter = source[currentPosition++]) == '\\')
1081                   && (source[currentPosition] == 'u')) {
1082                   getNextUnicodeChar();
1083                 } else {
1084                   if (withoutUnicodePtr != 0) {
1085                     withoutUnicodeBuffer[++withoutUnicodePtr] =
1086                       currentCharacter;
1087                   }
1088                 }
1089
1090                 while (currentCharacter != '\'') {
1091
1092                   /**** in PHP \r and \n are valid in string literals ****/
1093                   //                  if ((currentCharacter == '\n')
1094                   //                    || (currentCharacter == '\r')) {
1095                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1096                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1097                   //                      if (currentPosition + lookAhead == source.length)
1098                   //                        break;
1099                   //                      if (source[currentPosition + lookAhead] == '\n')
1100                   //                        break;
1101                   //                      if (source[currentPosition + lookAhead] == '\"') {
1102                   //                        currentPosition += lookAhead + 1;
1103                   //                        break;
1104                   //                      }
1105                   //                    }
1106                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1107                   //                  }
1108                   if (currentCharacter == '\\') {
1109                     int escapeSize = currentPosition;
1110                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1111                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1112                     scanSingleQuotedEscapeCharacter();
1113                     escapeSize = currentPosition - escapeSize;
1114                     if (withoutUnicodePtr == 0) {
1115                       //buffer all the entries that have been left aside....
1116                       withoutUnicodePtr =
1117                         currentPosition - escapeSize - 1 - startPosition;
1118                       System.arraycopy(
1119                         source,
1120                         startPosition,
1121                         withoutUnicodeBuffer,
1122                         1,
1123                         withoutUnicodePtr);
1124                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1125                         currentCharacter;
1126                     } else { //overwrite the / in the buffer
1127                       withoutUnicodeBuffer[withoutUnicodePtr] =
1128                         currentCharacter;
1129                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1130                         withoutUnicodePtr--;
1131                       }
1132                     }
1133                   }
1134                   // consume next character
1135                   unicodeAsBackSlash = false;
1136                   if (((currentCharacter = source[currentPosition++]) == '\\')
1137                     && (source[currentPosition] == 'u')) {
1138                     getNextUnicodeChar();
1139                   } else {
1140                     if (withoutUnicodePtr != 0) {
1141                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1142                         currentCharacter;
1143                     }
1144                   }
1145
1146                 }
1147               } catch (IndexOutOfBoundsException e) {
1148                 throw new InvalidInputException(UNTERMINATED_STRING);
1149               } catch (InvalidInputException e) {
1150                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1151                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1152                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1153                     if (currentPosition + lookAhead == source.length)
1154                       break;
1155                     if (source[currentPosition + lookAhead] == '\n')
1156                       break;
1157                     if (source[currentPosition + lookAhead] == '\'') {
1158                       currentPosition += lookAhead + 1;
1159                       break;
1160                     }
1161                   }
1162
1163                 }
1164                 throw e; // rethrow
1165               }
1166               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1167                 if (currentLine == null) {
1168                   currentLine = new NLSLine();
1169                   lines.add(currentLine);
1170                 }
1171                 currentLine.add(
1172                   new StringLiteral(
1173                     getCurrentTokenSourceString(),
1174                     startPosition,
1175                     currentPosition - 1));
1176               }
1177               return TokenNameStringConstant;
1178             case '"' :
1179               try {
1180                 // consume next character
1181                 unicodeAsBackSlash = false;
1182                 if (((currentCharacter = source[currentPosition++]) == '\\')
1183                   && (source[currentPosition] == 'u')) {
1184                   getNextUnicodeChar();
1185                 } else {
1186                   if (withoutUnicodePtr != 0) {
1187                     withoutUnicodeBuffer[++withoutUnicodePtr] =
1188                       currentCharacter;
1189                   }
1190                 }
1191
1192                 while (currentCharacter != '"') {
1193
1194                   /**** in PHP \r and \n are valid in string literals ****/
1195                   //                  if ((currentCharacter == '\n')
1196                   //                    || (currentCharacter == '\r')) {
1197                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1198                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1199                   //                      if (currentPosition + lookAhead == source.length)
1200                   //                        break;
1201                   //                      if (source[currentPosition + lookAhead] == '\n')
1202                   //                        break;
1203                   //                      if (source[currentPosition + lookAhead] == '\"') {
1204                   //                        currentPosition += lookAhead + 1;
1205                   //                        break;
1206                   //                      }
1207                   //                    }
1208                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1209                   //                  }
1210                   if (currentCharacter == '\\') {
1211                     int escapeSize = currentPosition;
1212                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1213                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1214                     scanDoubleQuotedEscapeCharacter();
1215                     escapeSize = currentPosition - escapeSize;
1216                     if (withoutUnicodePtr == 0) {
1217                       //buffer all the entries that have been left aside....
1218                       withoutUnicodePtr =
1219                         currentPosition - escapeSize - 1 - startPosition;
1220                       System.arraycopy(
1221                         source,
1222                         startPosition,
1223                         withoutUnicodeBuffer,
1224                         1,
1225                         withoutUnicodePtr);
1226                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1227                         currentCharacter;
1228                     } else { //overwrite the / in the buffer
1229                       withoutUnicodeBuffer[withoutUnicodePtr] =
1230                         currentCharacter;
1231                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1232                         withoutUnicodePtr--;
1233                       }
1234                     }
1235                   }
1236                   // consume next character
1237                   unicodeAsBackSlash = false;
1238                   if (((currentCharacter = source[currentPosition++]) == '\\')
1239                     && (source[currentPosition] == 'u')) {
1240                     getNextUnicodeChar();
1241                   } else {
1242                     if (withoutUnicodePtr != 0) {
1243                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1244                         currentCharacter;
1245                     }
1246                   }
1247
1248                 }
1249               } catch (IndexOutOfBoundsException e) {
1250                 throw new InvalidInputException(UNTERMINATED_STRING);
1251               } catch (InvalidInputException e) {
1252                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1253                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1254                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1255                     if (currentPosition + lookAhead == source.length)
1256                       break;
1257                     if (source[currentPosition + lookAhead] == '\n')
1258                       break;
1259                     if (source[currentPosition + lookAhead] == '\"') {
1260                       currentPosition += lookAhead + 1;
1261                       break;
1262                     }
1263                   }
1264
1265                 }
1266                 throw e; // rethrow
1267               }
1268               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1269                 if (currentLine == null) {
1270                   currentLine = new NLSLine();
1271                   lines.add(currentLine);
1272                 }
1273                 currentLine.add(
1274                   new StringLiteral(
1275                     getCurrentTokenSourceString(),
1276                     startPosition,
1277                     currentPosition - 1));
1278               }
1279               return TokenNameStringLiteral;
1280             case '`' :
1281               try {
1282                 // consume next character
1283                 unicodeAsBackSlash = false;
1284                 if (((currentCharacter = source[currentPosition++]) == '\\')
1285                   && (source[currentPosition] == 'u')) {
1286                   getNextUnicodeChar();
1287                 } else {
1288                   if (withoutUnicodePtr != 0) {
1289                     withoutUnicodeBuffer[++withoutUnicodePtr] =
1290                       currentCharacter;
1291                   }
1292                 }
1293
1294                 while (currentCharacter != '`') {
1295
1296                   /**** in PHP \r and \n are valid in string literals ****/
1297                   //                if ((currentCharacter == '\n')
1298                   //                  || (currentCharacter == '\r')) {
1299                   //                  // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1300                   //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1301                   //                    if (currentPosition + lookAhead == source.length)
1302                   //                      break;
1303                   //                    if (source[currentPosition + lookAhead] == '\n')
1304                   //                      break;
1305                   //                    if (source[currentPosition + lookAhead] == '\"') {
1306                   //                      currentPosition += lookAhead + 1;
1307                   //                      break;
1308                   //                    }
1309                   //                  }
1310                   //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1311                   //                }
1312                   if (currentCharacter == '\\') {
1313                     int escapeSize = currentPosition;
1314                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1315                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1316                     scanDoubleQuotedEscapeCharacter();
1317                     escapeSize = currentPosition - escapeSize;
1318                     if (withoutUnicodePtr == 0) {
1319                       //buffer all the entries that have been left aside....
1320                       withoutUnicodePtr =
1321                         currentPosition - escapeSize - 1 - startPosition;
1322                       System.arraycopy(
1323                         source,
1324                         startPosition,
1325                         withoutUnicodeBuffer,
1326                         1,
1327                         withoutUnicodePtr);
1328                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1329                         currentCharacter;
1330                     } else { //overwrite the / in the buffer
1331                       withoutUnicodeBuffer[withoutUnicodePtr] =
1332                         currentCharacter;
1333                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1334                         withoutUnicodePtr--;
1335                       }
1336                     }
1337                   }
1338                   // consume next character
1339                   unicodeAsBackSlash = false;
1340                   if (((currentCharacter = source[currentPosition++]) == '\\')
1341                     && (source[currentPosition] == 'u')) {
1342                     getNextUnicodeChar();
1343                   } else {
1344                     if (withoutUnicodePtr != 0) {
1345                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1346                         currentCharacter;
1347                     }
1348                   }
1349
1350                 }
1351               } catch (IndexOutOfBoundsException e) {
1352                 throw new InvalidInputException(UNTERMINATED_STRING);
1353               } catch (InvalidInputException e) {
1354                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1355                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1356                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1357                     if (currentPosition + lookAhead == source.length)
1358                       break;
1359                     if (source[currentPosition + lookAhead] == '\n')
1360                       break;
1361                     if (source[currentPosition + lookAhead] == '`') {
1362                       currentPosition += lookAhead + 1;
1363                       break;
1364                     }
1365                   }
1366
1367                 }
1368                 throw e; // rethrow
1369               }
1370               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1371                 if (currentLine == null) {
1372                   currentLine = new NLSLine();
1373                   lines.add(currentLine);
1374                 }
1375                 currentLine.add(
1376                   new StringLiteral(
1377                     getCurrentTokenSourceString(),
1378                     startPosition,
1379                     currentPosition - 1));
1380               }
1381               return TokenNameStringInterpolated;
1382             case '#' :
1383             case '/' :
1384               {
1385                 int test;
1386                 if ((currentCharacter == '#')
1387                   || (test = getNextChar('/', '*')) == 0) {
1388                   //line comment
1389                   int endPositionForLineComment = 0;
1390                   try { //get the next char
1391                     if (((currentCharacter = source[currentPosition++])
1392                       == '\\')
1393                       && (source[currentPosition] == 'u')) {
1394                       //-------------unicode traitement ------------
1395                       int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1396                       currentPosition++;
1397                       while (source[currentPosition] == 'u') {
1398                         currentPosition++;
1399                       }
1400                       if ((c1 =
1401                         Character.getNumericValue(source[currentPosition++]))
1402                         > 15
1403                         || c1 < 0
1404                         || (c2 =
1405                           Character.getNumericValue(source[currentPosition++]))
1406                           > 15
1407                         || c2 < 0
1408                         || (c3 =
1409                           Character.getNumericValue(source[currentPosition++]))
1410                           > 15
1411                         || c3 < 0
1412                         || (c4 =
1413                           Character.getNumericValue(source[currentPosition++]))
1414                           > 15
1415                         || c4 < 0) {
1416                         throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1417                       } else {
1418                         currentCharacter =
1419                           (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1420                       }
1421                     }
1422
1423                     //handle the \\u case manually into comment
1424                     if (currentCharacter == '\\') {
1425                       if (source[currentPosition] == '\\')
1426                         currentPosition++;
1427                     } //jump over the \\
1428                     boolean isUnicode = false;
1429                     while (currentCharacter != '\r'
1430                       && currentCharacter != '\n') {
1431                       if (currentCharacter == '?') {
1432                         if (getNextChar('>')) {
1433                           startPosition = currentPosition - 2;
1434                           phpMode = false;
1435                           return TokenNameStopPHP;
1436                         }
1437                       }
1438
1439                       //get the next char
1440                       isUnicode = false;
1441                       if (((currentCharacter = source[currentPosition++])
1442                         == '\\')
1443                         && (source[currentPosition] == 'u')) {
1444                         isUnicode = true;
1445                         //-------------unicode traitement ------------
1446                         int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1447                         currentPosition++;
1448                         while (source[currentPosition] == 'u') {
1449                           currentPosition++;
1450                         }
1451                         if ((c1 =
1452                           Character.getNumericValue(source[currentPosition++]))
1453                           > 15
1454                           || c1 < 0
1455                           || (c2 =
1456                             Character.getNumericValue(
1457                               source[currentPosition++]))
1458                             > 15
1459                           || c2 < 0
1460                           || (c3 =
1461                             Character.getNumericValue(
1462                               source[currentPosition++]))
1463                             > 15
1464                           || c3 < 0
1465                           || (c4 =
1466                             Character.getNumericValue(
1467                               source[currentPosition++]))
1468                             > 15
1469                           || c4 < 0) {
1470                           throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1471                         } else {
1472                           currentCharacter =
1473                             (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1474                         }
1475                       }
1476                       //handle the \\u case manually into comment
1477                       if (currentCharacter == '\\') {
1478                         if (source[currentPosition] == '\\')
1479                           currentPosition++;
1480                       } //jump over the \\
1481                     }
1482                     if (isUnicode) {
1483                       endPositionForLineComment = currentPosition - 6;
1484                     } else {
1485                       endPositionForLineComment = currentPosition - 1;
1486                     }
1487                     recordComment(false);
1488                     if ((currentCharacter == '\r')
1489                       || (currentCharacter == '\n')) {
1490                       checkNonExternalizeString();
1491                       if (recordLineSeparator) {
1492                         if (isUnicode) {
1493                           pushUnicodeLineSeparator();
1494                         } else {
1495                           pushLineSeparator();
1496                         }
1497                       } else {
1498                         currentLine = null;
1499                       }
1500                     }
1501                     if (tokenizeComments) {
1502                       if (!isUnicode) {
1503                         currentPosition = endPositionForLineComment;
1504                         // reset one character behind
1505                       }
1506                       return TokenNameCOMMENT_LINE;
1507                     }
1508                   } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1509                     if (tokenizeComments) {
1510                       currentPosition--;
1511                       // reset one character behind
1512                       return TokenNameCOMMENT_LINE;
1513                     }
1514                   }
1515                   break;
1516                 }
1517                 if (test > 0) {
1518                   //traditional and annotation comment
1519                   boolean isJavadoc = false, star = false;
1520                   // consume next character
1521                   unicodeAsBackSlash = false;
1522                   if (((currentCharacter = source[currentPosition++]) == '\\')
1523                     && (source[currentPosition] == 'u')) {
1524                     getNextUnicodeChar();
1525                   } else {
1526                     if (withoutUnicodePtr != 0) {
1527                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1528                         currentCharacter;
1529                     }
1530                   }
1531
1532                   if (currentCharacter == '*') {
1533                     isJavadoc = true;
1534                     star = true;
1535                   }
1536                   if ((currentCharacter == '\r')
1537                     || (currentCharacter == '\n')) {
1538                     checkNonExternalizeString();
1539                     if (recordLineSeparator) {
1540                       pushLineSeparator();
1541                     } else {
1542                       currentLine = null;
1543                     }
1544                   }
1545                   try { //get the next char
1546                     if (((currentCharacter = source[currentPosition++])
1547                       == '\\')
1548                       && (source[currentPosition] == 'u')) {
1549                       //-------------unicode traitement ------------
1550                       getNextUnicodeChar();
1551                     }
1552                     //handle the \\u case manually into comment
1553                     if (currentCharacter == '\\') {
1554                       if (source[currentPosition] == '\\')
1555                         currentPosition++;
1556                       //jump over the \\
1557                     }
1558                     // empty comment is not a javadoc /**/
1559                     if (currentCharacter == '/') {
1560                       isJavadoc = false;
1561                     }
1562                     //loop until end of comment */
1563                     while ((currentCharacter != '/') || (!star)) {
1564                       if ((currentCharacter == '\r')
1565                         || (currentCharacter == '\n')) {
1566                         checkNonExternalizeString();
1567                         if (recordLineSeparator) {
1568                           pushLineSeparator();
1569                         } else {
1570                           currentLine = null;
1571                         }
1572                       }
1573                       star = currentCharacter == '*';
1574                       //get next char
1575                       if (((currentCharacter = source[currentPosition++])
1576                         == '\\')
1577                         && (source[currentPosition] == 'u')) {
1578                         //-------------unicode traitement ------------
1579                         getNextUnicodeChar();
1580                       }
1581                       //handle the \\u case manually into comment
1582                       if (currentCharacter == '\\') {
1583                         if (source[currentPosition] == '\\')
1584                           currentPosition++;
1585                       } //jump over the \\
1586                     }
1587                     recordComment(isJavadoc);
1588                     if (tokenizeComments) {
1589                       if (isJavadoc)
1590                         return TokenNameCOMMENT_PHPDOC;
1591                       return TokenNameCOMMENT_BLOCK;
1592                     }
1593                   } catch (IndexOutOfBoundsException e) {
1594                     throw new InvalidInputException(UNTERMINATED_COMMENT);
1595                   }
1596                   break;
1597                 }
1598                 if (getNextChar('='))
1599                   return TokenNameDIVIDE_EQUAL;
1600                 return TokenNameDIVIDE;
1601               }
1602             case '\u001a' :
1603               if (atEnd())
1604                 return TokenNameEOF;
1605               //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1606               throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1607
1608             default :
1609               if (currentCharacter == '$') {
1610                 while ((currentCharacter = source[currentPosition++]) == '$') {
1611                 }
1612                 if (currentCharacter == '{')
1613                   return TokenNameDOLLAR_LBRACE;
1614                 if (isPHPIdentifierStart(currentCharacter))
1615                   return scanIdentifierOrKeyword(true);
1616                 return TokenNameERROR;
1617               }
1618               if (isPHPIdentifierStart(currentCharacter))
1619                 return scanIdentifierOrKeyword(false);
1620               if (Character.isDigit(currentCharacter))
1621                 return scanNumber(false);
1622               return TokenNameERROR;
1623           }
1624         }
1625       } //-----------------end switch while try--------------------
1626       catch (IndexOutOfBoundsException e) {
1627       }
1628     }
1629     return TokenNameEOF;
1630   }
1631
1632   public final void getNextUnicodeChar()
1633     throws IndexOutOfBoundsException, InvalidInputException {
1634     //VOID
1635     //handle the case of unicode.
1636     //when a unicode appears then we must use a buffer that holds char internal values
1637     //At the end of this method currentCharacter holds the new visited char
1638     //and currentPosition points right next after it
1639
1640     //ALL getNextChar.... ARE OPTIMIZED COPIES
1641
1642     int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1643     currentPosition++;
1644     while (source[currentPosition] == 'u') {
1645       currentPosition++;
1646       unicodeSize++;
1647     }
1648
1649     if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1650       || c1 < 0
1651       || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1652       || c2 < 0
1653       || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1654       || c3 < 0
1655       || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1656       || c4 < 0) {
1657       throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1658     } else {
1659       currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1660       //need the unicode buffer
1661       if (withoutUnicodePtr == 0) {
1662         //buffer all the entries that have been left aside....
1663         withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1664         System.arraycopy(
1665           source,
1666           startPosition,
1667           withoutUnicodeBuffer,
1668           1,
1669           withoutUnicodePtr);
1670       }
1671       //fill the buffer with the char
1672       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1673     }
1674     unicodeAsBackSlash = currentCharacter == '\\';
1675   }
1676   /* Tokenize a method body, assuming that curly brackets are properly balanced.
1677    */
1678   public final void jumpOverMethodBody() {
1679
1680     this.wasAcr = false;
1681     int found = 1;
1682     try {
1683       while (true) { //loop for jumping over comments
1684         // ---------Consume white space and handles startPosition---------
1685         boolean isWhiteSpace;
1686         do {
1687           startPosition = currentPosition;
1688           if (((currentCharacter = source[currentPosition++]) == '\\')
1689             && (source[currentPosition] == 'u')) {
1690             isWhiteSpace = jumpOverUnicodeWhiteSpace();
1691           } else {
1692             if (recordLineSeparator
1693               && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1694               pushLineSeparator();
1695             isWhiteSpace = Character.isWhitespace(currentCharacter);
1696           }
1697         } while (isWhiteSpace);
1698
1699         // -------consume token until } is found---------
1700         switch (currentCharacter) {
1701           case '{' :
1702             found++;
1703             break;
1704           case '}' :
1705             found--;
1706             if (found == 0)
1707               return;
1708             break;
1709           case '\'' :
1710             {
1711               boolean test;
1712               test = getNextChar('\\');
1713               if (test) {
1714                 try {
1715                   scanDoubleQuotedEscapeCharacter();
1716                 } catch (InvalidInputException ex) {
1717                 };
1718               } else {
1719                 try { // consume next character
1720                   unicodeAsBackSlash = false;
1721                   if (((currentCharacter = source[currentPosition++]) == '\\')
1722                     && (source[currentPosition] == 'u')) {
1723                     getNextUnicodeChar();
1724                   } else {
1725                     if (withoutUnicodePtr != 0) {
1726                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1727                         currentCharacter;
1728                     }
1729                   }
1730                 } catch (InvalidInputException ex) {
1731                 };
1732               }
1733               getNextChar('\'');
1734               break;
1735             }
1736           case '"' :
1737             try {
1738               try { // consume next character
1739                 unicodeAsBackSlash = false;
1740                 if (((currentCharacter = source[currentPosition++]) == '\\')
1741                   && (source[currentPosition] == 'u')) {
1742                   getNextUnicodeChar();
1743                 } else {
1744                   if (withoutUnicodePtr != 0) {
1745                     withoutUnicodeBuffer[++withoutUnicodePtr] =
1746                       currentCharacter;
1747                   }
1748                 }
1749               } catch (InvalidInputException ex) {
1750               };
1751               while (currentCharacter != '"') {
1752                 if (currentCharacter == '\r') {
1753                   if (source[currentPosition] == '\n')
1754                     currentPosition++;
1755                   break;
1756                   // the string cannot go further that the line
1757                 }
1758                 if (currentCharacter == '\n') {
1759                   break;
1760                   // the string cannot go further that the line
1761                 }
1762                 if (currentCharacter == '\\') {
1763                   try {
1764                     scanDoubleQuotedEscapeCharacter();
1765                   } catch (InvalidInputException ex) {
1766                   };
1767                 }
1768                 try { // consume next character
1769                   unicodeAsBackSlash = false;
1770                   if (((currentCharacter = source[currentPosition++]) == '\\')
1771                     && (source[currentPosition] == 'u')) {
1772                     getNextUnicodeChar();
1773                   } else {
1774                     if (withoutUnicodePtr != 0) {
1775                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1776                         currentCharacter;
1777                     }
1778                   }
1779                 } catch (InvalidInputException ex) {
1780                 };
1781               }
1782             } catch (IndexOutOfBoundsException e) {
1783               return;
1784             }
1785             break;
1786           case '/' :
1787             {
1788               int test;
1789               if ((test = getNextChar('/', '*')) == 0) {
1790                 //line comment
1791                 try {
1792                   //get the next char
1793                   if (((currentCharacter = source[currentPosition++]) == '\\')
1794                     && (source[currentPosition] == 'u')) {
1795                     //-------------unicode traitement ------------
1796                     int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1797                     currentPosition++;
1798                     while (source[currentPosition] == 'u') {
1799                       currentPosition++;
1800                     }
1801                     if ((c1 =
1802                       Character.getNumericValue(source[currentPosition++]))
1803                       > 15
1804                       || c1 < 0
1805                       || (c2 =
1806                         Character.getNumericValue(source[currentPosition++]))
1807                         > 15
1808                       || c2 < 0
1809                       || (c3 =
1810                         Character.getNumericValue(source[currentPosition++]))
1811                         > 15
1812                       || c3 < 0
1813                       || (c4 =
1814                         Character.getNumericValue(source[currentPosition++]))
1815                         > 15
1816                       || c4 < 0) {
1817                       //error don't care of the value
1818                       currentCharacter = 'A';
1819                     } //something different from \n and \r
1820                     else {
1821                       currentCharacter =
1822                         (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1823                     }
1824                   }
1825
1826                   while (currentCharacter != '\r'
1827                     && currentCharacter != '\n') {
1828                     //get the next char
1829                     if (((currentCharacter = source[currentPosition++])
1830                       == '\\')
1831                       && (source[currentPosition] == 'u')) {
1832                       //-------------unicode traitement ------------
1833                       int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1834                       currentPosition++;
1835                       while (source[currentPosition] == 'u') {
1836                         currentPosition++;
1837                       }
1838                       if ((c1 =
1839                         Character.getNumericValue(source[currentPosition++]))
1840                         > 15
1841                         || c1 < 0
1842                         || (c2 =
1843                           Character.getNumericValue(source[currentPosition++]))
1844                           > 15
1845                         || c2 < 0
1846                         || (c3 =
1847                           Character.getNumericValue(source[currentPosition++]))
1848                           > 15
1849                         || c3 < 0
1850                         || (c4 =
1851                           Character.getNumericValue(source[currentPosition++]))
1852                           > 15
1853                         || c4 < 0) {
1854                         //error don't care of the value
1855                         currentCharacter = 'A';
1856                       } //something different from \n and \r
1857                       else {
1858                         currentCharacter =
1859                           (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1860                       }
1861                     }
1862                   }
1863                   if (recordLineSeparator
1864                     && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1865                     pushLineSeparator();
1866                 } catch (IndexOutOfBoundsException e) {
1867                 } //an eof will them be generated
1868                 break;
1869               }
1870               if (test > 0) {
1871                 //traditional and annotation comment
1872                 boolean star = false;
1873                 try { // consume next character
1874                   unicodeAsBackSlash = false;
1875                   if (((currentCharacter = source[currentPosition++]) == '\\')
1876                     && (source[currentPosition] == 'u')) {
1877                     getNextUnicodeChar();
1878                   } else {
1879                     if (withoutUnicodePtr != 0) {
1880                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1881                         currentCharacter;
1882                     }
1883                   };
1884                 } catch (InvalidInputException ex) {
1885                 };
1886                 if (currentCharacter == '*') {
1887                   star = true;
1888                 }
1889                 if (recordLineSeparator
1890                   && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1891                   pushLineSeparator();
1892                 try { //get the next char
1893                   if (((currentCharacter = source[currentPosition++]) == '\\')
1894                     && (source[currentPosition] == 'u')) {
1895                     //-------------unicode traitement ------------
1896                     int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1897                     currentPosition++;
1898                     while (source[currentPosition] == 'u') {
1899                       currentPosition++;
1900                     }
1901                     if ((c1 =
1902                       Character.getNumericValue(source[currentPosition++]))
1903                       > 15
1904                       || c1 < 0
1905                       || (c2 =
1906                         Character.getNumericValue(source[currentPosition++]))
1907                         > 15
1908                       || c2 < 0
1909                       || (c3 =
1910                         Character.getNumericValue(source[currentPosition++]))
1911                         > 15
1912                       || c3 < 0
1913                       || (c4 =
1914                         Character.getNumericValue(source[currentPosition++]))
1915                         > 15
1916                       || c4 < 0) {
1917                       //error don't care of the value
1918                       currentCharacter = 'A';
1919                     } //something different from * and /
1920                     else {
1921                       currentCharacter =
1922                         (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1923                     }
1924                   }
1925                   //loop until end of comment */
1926                   while ((currentCharacter != '/') || (!star)) {
1927                     if (recordLineSeparator
1928                       && ((currentCharacter == '\r')
1929                         || (currentCharacter == '\n')))
1930                       pushLineSeparator();
1931                     star = currentCharacter == '*';
1932                     //get next char
1933                     if (((currentCharacter = source[currentPosition++])
1934                       == '\\')
1935                       && (source[currentPosition] == 'u')) {
1936                       //-------------unicode traitement ------------
1937                       int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1938                       currentPosition++;
1939                       while (source[currentPosition] == 'u') {
1940                         currentPosition++;
1941                       }
1942                       if ((c1 =
1943                         Character.getNumericValue(source[currentPosition++]))
1944                         > 15
1945                         || c1 < 0
1946                         || (c2 =
1947                           Character.getNumericValue(source[currentPosition++]))
1948                           > 15
1949                         || c2 < 0
1950                         || (c3 =
1951                           Character.getNumericValue(source[currentPosition++]))
1952                           > 15
1953                         || c3 < 0
1954                         || (c4 =
1955                           Character.getNumericValue(source[currentPosition++]))
1956                           > 15
1957                         || c4 < 0) {
1958                         //error don't care of the value
1959                         currentCharacter = 'A';
1960                       } //something different from * and /
1961                       else {
1962                         currentCharacter =
1963                           (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1964                       }
1965                     }
1966                   }
1967                 } catch (IndexOutOfBoundsException e) {
1968                   return;
1969                 }
1970                 break;
1971               }
1972               break;
1973             }
1974
1975           default :
1976             if (isPHPIdentifierStart(currentCharacter)
1977               || currentCharacter == '$') {
1978               try {
1979                 scanIdentifierOrKeyword((currentCharacter == '$'));
1980               } catch (InvalidInputException ex) {
1981               };
1982               break;
1983             }
1984             if (Character.isDigit(currentCharacter)) {
1985               try {
1986                 scanNumber(false);
1987               } catch (InvalidInputException ex) {
1988               };
1989               break;
1990             }
1991         }
1992       }
1993       //-----------------end switch while try--------------------
1994     } catch (IndexOutOfBoundsException e) {
1995     } catch (InvalidInputException e) {
1996     }
1997     return;
1998   }
1999   public final boolean jumpOverUnicodeWhiteSpace()
2000     throws InvalidInputException {
2001     //BOOLEAN
2002     //handle the case of unicode. Jump over the next whiteSpace
2003     //making startPosition pointing on the next available char
2004     //On false, the currentCharacter is filled up with a potential
2005     //correct char
2006
2007     try {
2008       this.wasAcr = false;
2009       int c1, c2, c3, c4;
2010       int unicodeSize = 6;
2011       currentPosition++;
2012       while (source[currentPosition] == 'u') {
2013         currentPosition++;
2014         unicodeSize++;
2015       }
2016
2017       if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2018         || c1 < 0)
2019         || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2020           || c2 < 0)
2021         || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2022           || c3 < 0)
2023         || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2024           || c4 < 0)) {
2025         throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2026       }
2027
2028       currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2029       if (recordLineSeparator
2030         && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2031         pushLineSeparator();
2032       if (Character.isWhitespace(currentCharacter))
2033         return true;
2034
2035       //buffer the new char which is not a white space
2036       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2037       //withoutUnicodePtr == 1 is true here
2038       return false;
2039     } catch (IndexOutOfBoundsException e) {
2040       throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2041     }
2042   }
2043   public final int[] getLineEnds() {
2044     //return a bounded copy of this.lineEnds
2045
2046     int[] copy;
2047     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2048     return copy;
2049   }
2050
2051   public char[] getSource() {
2052     return this.source;
2053   }
2054   final char[] optimizedCurrentTokenSource1() {
2055     //return always the same char[] build only once
2056
2057     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2058     char charOne = source[startPosition];
2059     switch (charOne) {
2060       case 'a' :
2061         return charArray_a;
2062       case 'b' :
2063         return charArray_b;
2064       case 'c' :
2065         return charArray_c;
2066       case 'd' :
2067         return charArray_d;
2068       case 'e' :
2069         return charArray_e;
2070       case 'f' :
2071         return charArray_f;
2072       case 'g' :
2073         return charArray_g;
2074       case 'h' :
2075         return charArray_h;
2076       case 'i' :
2077         return charArray_i;
2078       case 'j' :
2079         return charArray_j;
2080       case 'k' :
2081         return charArray_k;
2082       case 'l' :
2083         return charArray_l;
2084       case 'm' :
2085         return charArray_m;
2086       case 'n' :
2087         return charArray_n;
2088       case 'o' :
2089         return charArray_o;
2090       case 'p' :
2091         return charArray_p;
2092       case 'q' :
2093         return charArray_q;
2094       case 'r' :
2095         return charArray_r;
2096       case 's' :
2097         return charArray_s;
2098       case 't' :
2099         return charArray_t;
2100       case 'u' :
2101         return charArray_u;
2102       case 'v' :
2103         return charArray_v;
2104       case 'w' :
2105         return charArray_w;
2106       case 'x' :
2107         return charArray_x;
2108       case 'y' :
2109         return charArray_y;
2110       case 'z' :
2111         return charArray_z;
2112       default :
2113         return new char[] { charOne };
2114     }
2115   }
2116
2117   final char[] optimizedCurrentTokenSource2() {
2118     //try to return the same char[] build only once
2119
2120     char c0, c1;
2121     int hash =
2122       (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2123         % TableSize;
2124     char[][] table = charArray_length[0][hash];
2125     int i = newEntry2;
2126     while (++i < InternalTableSize) {
2127       char[] charArray = table[i];
2128       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2129         return charArray;
2130     }
2131     //---------other side---------
2132     i = -1;
2133     int max = newEntry2;
2134     while (++i <= max) {
2135       char[] charArray = table[i];
2136       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2137         return charArray;
2138     }
2139     //--------add the entry-------
2140     if (++max >= InternalTableSize)
2141       max = 0;
2142     char[] r;
2143     table[max] = (r = new char[] { c0, c1 });
2144     newEntry2 = max;
2145     return r;
2146   }
2147
2148   final char[] optimizedCurrentTokenSource3() {
2149     //try to return the same char[] build only once
2150
2151     char c0, c1, c2;
2152     int hash =
2153       (((c0 = source[startPosition]) << 12)
2154         + ((c1 = source[startPosition + 1]) << 6)
2155         + (c2 = source[startPosition + 2]))
2156         % TableSize;
2157     char[][] table = charArray_length[1][hash];
2158     int i = newEntry3;
2159     while (++i < InternalTableSize) {
2160       char[] charArray = table[i];
2161       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2162         return charArray;
2163     }
2164     //---------other side---------
2165     i = -1;
2166     int max = newEntry3;
2167     while (++i <= max) {
2168       char[] charArray = table[i];
2169       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2170         return charArray;
2171     }
2172     //--------add the entry-------
2173     if (++max >= InternalTableSize)
2174       max = 0;
2175     char[] r;
2176     table[max] = (r = new char[] { c0, c1, c2 });
2177     newEntry3 = max;
2178     return r;
2179   }
2180
2181   final char[] optimizedCurrentTokenSource4() {
2182     //try to return the same char[] build only once
2183
2184     char c0, c1, c2, c3;
2185     long hash =
2186       ((((long) (c0 = source[startPosition])) << 18)
2187         + ((c1 = source[startPosition + 1]) << 12)
2188         + ((c2 = source[startPosition + 2]) << 6)
2189         + (c3 = source[startPosition + 3]))
2190         % TableSize;
2191     char[][] table = charArray_length[2][(int) hash];
2192     int i = newEntry4;
2193     while (++i < InternalTableSize) {
2194       char[] charArray = table[i];
2195       if ((c0 == charArray[0])
2196         && (c1 == charArray[1])
2197         && (c2 == charArray[2])
2198         && (c3 == charArray[3]))
2199         return charArray;
2200     }
2201     //---------other side---------
2202     i = -1;
2203     int max = newEntry4;
2204     while (++i <= max) {
2205       char[] charArray = table[i];
2206       if ((c0 == charArray[0])
2207         && (c1 == charArray[1])
2208         && (c2 == charArray[2])
2209         && (c3 == charArray[3]))
2210         return charArray;
2211     }
2212     //--------add the entry-------
2213     if (++max >= InternalTableSize)
2214       max = 0;
2215     char[] r;
2216     table[max] = (r = new char[] { c0, c1, c2, c3 });
2217     newEntry4 = max;
2218     return r;
2219
2220   }
2221
2222   final char[] optimizedCurrentTokenSource5() {
2223     //try to return the same char[] build only once
2224
2225     char c0, c1, c2, c3, c4;
2226     long hash =
2227       ((((long) (c0 = source[startPosition])) << 24)
2228         + (((long) (c1 = source[startPosition + 1])) << 18)
2229         + ((c2 = source[startPosition + 2]) << 12)
2230         + ((c3 = source[startPosition + 3]) << 6)
2231         + (c4 = source[startPosition + 4]))
2232         % TableSize;
2233     char[][] table = charArray_length[3][(int) hash];
2234     int i = newEntry5;
2235     while (++i < InternalTableSize) {
2236       char[] charArray = table[i];
2237       if ((c0 == charArray[0])
2238         && (c1 == charArray[1])
2239         && (c2 == charArray[2])
2240         && (c3 == charArray[3])
2241         && (c4 == charArray[4]))
2242         return charArray;
2243     }
2244     //---------other side---------
2245     i = -1;
2246     int max = newEntry5;
2247     while (++i <= max) {
2248       char[] charArray = table[i];
2249       if ((c0 == charArray[0])
2250         && (c1 == charArray[1])
2251         && (c2 == charArray[2])
2252         && (c3 == charArray[3])
2253         && (c4 == charArray[4]))
2254         return charArray;
2255     }
2256     //--------add the entry-------
2257     if (++max >= InternalTableSize)
2258       max = 0;
2259     char[] r;
2260     table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2261     newEntry5 = max;
2262     return r;
2263
2264   }
2265
2266   final char[] optimizedCurrentTokenSource6() {
2267     //try to return the same char[] build only once
2268
2269     char c0, c1, c2, c3, c4, c5;
2270     long hash =
2271       ((((long) (c0 = source[startPosition])) << 32)
2272         + (((long) (c1 = source[startPosition + 1])) << 24)
2273         + (((long) (c2 = source[startPosition + 2])) << 18)
2274         + ((c3 = source[startPosition + 3]) << 12)
2275         + ((c4 = source[startPosition + 4]) << 6)
2276         + (c5 = source[startPosition + 5]))
2277         % TableSize;
2278     char[][] table = charArray_length[4][(int) hash];
2279     int i = newEntry6;
2280     while (++i < InternalTableSize) {
2281       char[] charArray = table[i];
2282       if ((c0 == charArray[0])
2283         && (c1 == charArray[1])
2284         && (c2 == charArray[2])
2285         && (c3 == charArray[3])
2286         && (c4 == charArray[4])
2287         && (c5 == charArray[5]))
2288         return charArray;
2289     }
2290     //---------other side---------
2291     i = -1;
2292     int max = newEntry6;
2293     while (++i <= max) {
2294       char[] charArray = table[i];
2295       if ((c0 == charArray[0])
2296         && (c1 == charArray[1])
2297         && (c2 == charArray[2])
2298         && (c3 == charArray[3])
2299         && (c4 == charArray[4])
2300         && (c5 == charArray[5]))
2301         return charArray;
2302     }
2303     //--------add the entry-------
2304     if (++max >= InternalTableSize)
2305       max = 0;
2306     char[] r;
2307     table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2308     newEntry6 = max;
2309     return r;
2310   }
2311
2312   public final void pushLineSeparator() throws InvalidInputException {
2313     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2314     final int INCREMENT = 250;
2315
2316     if (this.checkNonExternalizedStringLiterals) {
2317       // reinitialize the current line for non externalize strings purpose
2318       currentLine = null;
2319     }
2320     //currentCharacter is at position currentPosition-1
2321
2322     // cr 000D
2323     if (currentCharacter == '\r') {
2324       int separatorPos = currentPosition - 1;
2325       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2326         return;
2327       //System.out.println("CR-" + separatorPos);
2328       try {
2329         lineEnds[++linePtr] = separatorPos;
2330       } catch (IndexOutOfBoundsException e) {
2331         //linePtr value is correct
2332         int oldLength = lineEnds.length;
2333         int[] old = lineEnds;
2334         lineEnds = new int[oldLength + INCREMENT];
2335         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2336         lineEnds[linePtr] = separatorPos;
2337       }
2338       // look-ahead for merged cr+lf
2339       try {
2340         if (source[currentPosition] == '\n') {
2341           //System.out.println("look-ahead LF-" + currentPosition);
2342           lineEnds[linePtr] = currentPosition;
2343           currentPosition++;
2344           wasAcr = false;
2345         } else {
2346           wasAcr = true;
2347         }
2348       } catch (IndexOutOfBoundsException e) {
2349         wasAcr = true;
2350       }
2351     } else {
2352       // lf 000A
2353       if (currentCharacter == '\n') {
2354         //must merge eventual cr followed by lf
2355         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2356           //System.out.println("merge LF-" + (currentPosition - 1));
2357           lineEnds[linePtr] = currentPosition - 1;
2358         } else {
2359           int separatorPos = currentPosition - 1;
2360           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2361             return;
2362           // System.out.println("LF-" + separatorPos);
2363           try {
2364             lineEnds[++linePtr] = separatorPos;
2365           } catch (IndexOutOfBoundsException e) {
2366             //linePtr value is correct
2367             int oldLength = lineEnds.length;
2368             int[] old = lineEnds;
2369             lineEnds = new int[oldLength + INCREMENT];
2370             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2371             lineEnds[linePtr] = separatorPos;
2372           }
2373         }
2374         wasAcr = false;
2375       }
2376     }
2377   }
2378   public final void pushUnicodeLineSeparator() {
2379     // isUnicode means that the \r or \n has been read as a unicode character
2380
2381     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2382
2383     final int INCREMENT = 250;
2384     //currentCharacter is at position currentPosition-1
2385
2386     if (this.checkNonExternalizedStringLiterals) {
2387       // reinitialize the current line for non externalize strings purpose
2388       currentLine = null;
2389     }
2390
2391     // cr 000D
2392     if (currentCharacter == '\r') {
2393       int separatorPos = currentPosition - 6;
2394       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2395         return;
2396       //System.out.println("CR-" + separatorPos);
2397       try {
2398         lineEnds[++linePtr] = separatorPos;
2399       } catch (IndexOutOfBoundsException e) {
2400         //linePtr value is correct
2401         int oldLength = lineEnds.length;
2402         int[] old = lineEnds;
2403         lineEnds = new int[oldLength + INCREMENT];
2404         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2405         lineEnds[linePtr] = separatorPos;
2406       }
2407       // look-ahead for merged cr+lf
2408       if (source[currentPosition] == '\n') {
2409         //System.out.println("look-ahead LF-" + currentPosition);
2410         lineEnds[linePtr] = currentPosition;
2411         currentPosition++;
2412         wasAcr = false;
2413       } else {
2414         wasAcr = true;
2415       }
2416     } else {
2417       // lf 000A
2418       if (currentCharacter == '\n') {
2419         //must merge eventual cr followed by lf
2420         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2421           //System.out.println("merge LF-" + (currentPosition - 1));
2422           lineEnds[linePtr] = currentPosition - 6;
2423         } else {
2424           int separatorPos = currentPosition - 6;
2425           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2426             return;
2427           // System.out.println("LF-" + separatorPos);
2428           try {
2429             lineEnds[++linePtr] = separatorPos;
2430           } catch (IndexOutOfBoundsException e) {
2431             //linePtr value is correct
2432             int oldLength = lineEnds.length;
2433             int[] old = lineEnds;
2434             lineEnds = new int[oldLength + INCREMENT];
2435             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2436             lineEnds[linePtr] = separatorPos;
2437           }
2438         }
2439         wasAcr = false;
2440       }
2441     }
2442   }
2443   public final void recordComment(boolean isJavadoc) {
2444
2445     // a new annotation comment is recorded
2446     try {
2447       commentStops[++commentPtr] =
2448         isJavadoc ? currentPosition : -currentPosition;
2449     } catch (IndexOutOfBoundsException e) {
2450       int oldStackLength = commentStops.length;
2451       int[] oldStack = commentStops;
2452       commentStops = new int[oldStackLength + 30];
2453       System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2454       commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2455       //grows the positions buffers too
2456       int[] old = commentStarts;
2457       commentStarts = new int[oldStackLength + 30];
2458       System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2459     }
2460
2461     //the buffer is of a correct size here
2462     commentStarts[commentPtr] = startPosition;
2463   }
2464   public void resetTo(int begin, int end) {
2465     //reset the scanner to a given position where it may rescan again
2466
2467     diet = false;
2468     initialPosition = startPosition = currentPosition = begin;
2469     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2470     commentPtr = -1; // reset comment stack
2471   }
2472
2473   public final void scanSingleQuotedEscapeCharacter()
2474     throws InvalidInputException {
2475     // the string with "\\u" is a legal string of two chars \ and u
2476     //thus we use a direct access to the source (for regular cases).
2477
2478     if (unicodeAsBackSlash) {
2479       // consume next character
2480       unicodeAsBackSlash = false;
2481       if (((currentCharacter = source[currentPosition++]) == '\\')
2482         && (source[currentPosition] == 'u')) {
2483         getNextUnicodeChar();
2484       } else {
2485         if (withoutUnicodePtr != 0) {
2486           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2487         }
2488       }
2489     } else
2490       currentCharacter = source[currentPosition++];
2491     switch (currentCharacter) {
2492       case '\'' :
2493         currentCharacter = '\'';
2494         break;
2495       case '\\' :
2496         currentCharacter = '\\';
2497         break;
2498       default :
2499         currentCharacter = '\\';
2500         currentPosition--;
2501     }
2502   }
2503
2504   public final void scanDoubleQuotedEscapeCharacter()
2505     throws InvalidInputException {
2506     // the string with "\\u" is a legal string of two chars \ and u
2507     //thus we use a direct access to the source (for regular cases).
2508
2509     if (unicodeAsBackSlash) {
2510       // consume next character
2511       unicodeAsBackSlash = false;
2512       if (((currentCharacter = source[currentPosition++]) == '\\')
2513         && (source[currentPosition] == 'u')) {
2514         getNextUnicodeChar();
2515       } else {
2516         if (withoutUnicodePtr != 0) {
2517           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2518         }
2519       }
2520     } else
2521       currentCharacter = source[currentPosition++];
2522     switch (currentCharacter) {
2523       //      case 'b' :
2524       //        currentCharacter = '\b';
2525       //        break;
2526       case 't' :
2527         currentCharacter = '\t';
2528         break;
2529       case 'n' :
2530         currentCharacter = '\n';
2531         break;
2532         //      case 'f' :
2533         //        currentCharacter = '\f';
2534         //        break;
2535       case 'r' :
2536         currentCharacter = '\r';
2537         break;
2538       case '\"' :
2539         currentCharacter = '\"';
2540         break;
2541       case '\'' :
2542         currentCharacter = '\'';
2543         break;
2544       case '\\' :
2545         currentCharacter = '\\';
2546         break;
2547       case '$' :
2548         currentCharacter = '$';
2549         break;
2550       default :
2551         // -----------octal escape--------------
2552         // OctalDigit
2553         // OctalDigit OctalDigit
2554         // ZeroToThree OctalDigit OctalDigit
2555
2556         int number = Character.getNumericValue(currentCharacter);
2557         if (number >= 0 && number <= 7) {
2558           boolean zeroToThreeNot = number > 3;
2559           if (Character
2560             .isDigit(currentCharacter = source[currentPosition++])) {
2561             int digit = Character.getNumericValue(currentCharacter);
2562             if (digit >= 0 && digit <= 7) {
2563               number = (number * 8) + digit;
2564               if (Character
2565                 .isDigit(currentCharacter = source[currentPosition++])) {
2566                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2567                   currentPosition--;
2568                 } else {
2569                   digit = Character.getNumericValue(currentCharacter);
2570                   if (digit >= 0 && digit <= 7) {
2571                     // has read \ZeroToThree OctalDigit OctalDigit
2572                     number = (number * 8) + digit;
2573                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2574                     currentPosition--;
2575                   }
2576                 }
2577               } else { // has read \OctalDigit NonDigit--> ignore last character
2578                 currentPosition--;
2579               }
2580             } else { // has read \OctalDigit NonOctalDigit--> ignore last character
2581               currentPosition--;
2582             }
2583           } else { // has read \OctalDigit --> ignore last character
2584             currentPosition--;
2585           }
2586           if (number > 255)
2587             throw new InvalidInputException(INVALID_ESCAPE);
2588           currentCharacter = (char) number;
2589         }
2590         //else
2591         //     throw new InvalidInputException(INVALID_ESCAPE);
2592     }
2593   }
2594
2595   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2596   //    return scanIdentifierOrKeyword( false );
2597   //  }
2598
2599   public int scanIdentifierOrKeyword(boolean isVariable)
2600     throws InvalidInputException {
2601     //test keywords
2602
2603     //first dispatch on the first char.
2604     //then the length. If there are several
2605     //keywors with the same length AND the same first char, then do another
2606     //disptach on the second char :-)...cool....but fast !
2607
2608     useAssertAsAnIndentifier = false;
2609
2610     while (getNextCharAsJavaIdentifierPart()) {
2611     };
2612
2613     if (isVariable) {
2614       return TokenNameVariable;
2615     }
2616     int index, length;
2617     char[] data;
2618     char firstLetter;
2619     if (withoutUnicodePtr == 0)
2620
2621       //quick test on length == 1 but not on length > 12 while most identifier
2622       //have a length which is <= 12...but there are lots of identifier with
2623       //only one char....
2624
2625       {
2626       if ((length = currentPosition - startPosition) == 1)
2627         return TokenNameIdentifier;
2628       //  data = source;
2629       data = new char[length];
2630       index = startPosition;
2631       for (int i = 0; i < length; i++) {
2632         data[i] = Character.toLowerCase(source[index + i]);
2633       }
2634       index = 0;
2635     } else {
2636       if ((length = withoutUnicodePtr) == 1)
2637         return TokenNameIdentifier;
2638       // data = withoutUnicodeBuffer;
2639       data = new char[withoutUnicodeBuffer.length];
2640       for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2641         data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2642       }
2643       index = 1;
2644     }
2645
2646     firstLetter = data[index];
2647     switch (firstLetter) {
2648
2649       case 'a' : // as and array
2650         switch (length) {
2651           case 2 : //as
2652             if ((data[++index] == 's')) {
2653               return TokenNameas;
2654             } else {
2655               return TokenNameIdentifier;
2656             }
2657           case 3 : //and
2658             if ((data[++index] == 'n') && (data[++index] == 'd')) {
2659               return TokenNameAND;
2660             } else {
2661               return TokenNameIdentifier;
2662             }
2663             //          case 5 :
2664             //            if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2665             //              return TokenNamearray;
2666             //            else
2667             //              return TokenNameIdentifier;
2668           default :
2669             return TokenNameIdentifier;
2670         }
2671       case 'b' : //break
2672         switch (length) {
2673           case 5 :
2674             if ((data[++index] == 'r')
2675               && (data[++index] == 'e')
2676               && (data[++index] == 'a')
2677               && (data[++index] == 'k'))
2678               return TokenNamebreak;
2679             else
2680               return TokenNameIdentifier;
2681           default :
2682             return TokenNameIdentifier;
2683         }
2684
2685       case 'c' : //case class continue
2686         switch (length) {
2687           case 4 :
2688             if ((data[++index] == 'a')
2689               && (data[++index] == 's')
2690               && (data[++index] == 'e'))
2691               return TokenNamecase;
2692             else
2693               return TokenNameIdentifier;
2694           case 5 :
2695             if ((data[++index] == 'l')
2696               && (data[++index] == 'a')
2697               && (data[++index] == 's')
2698               && (data[++index] == 's'))
2699               return TokenNameclass;
2700             else
2701               return TokenNameIdentifier;
2702           case 8 :
2703             if ((data[++index] == 'o')
2704               && (data[++index] == 'n')
2705               && (data[++index] == 't')
2706               && (data[++index] == 'i')
2707               && (data[++index] == 'n')
2708               && (data[++index] == 'u')
2709               && (data[++index] == 'e'))
2710               return TokenNamecontinue;
2711             else
2712               return TokenNameIdentifier;
2713           default :
2714             return TokenNameIdentifier;
2715         }
2716
2717       case 'd' : //define default do
2718         switch (length) {
2719           case 2 :
2720             if ((data[++index] == 'o'))
2721               return TokenNamedo;
2722             else
2723               return TokenNameIdentifier;
2724           case 6 :
2725             if ((data[++index] == 'e')
2726               && (data[++index] == 'f')
2727               && (data[++index] == 'i')
2728               && (data[++index] == 'n')
2729               && (data[++index] == 'e'))
2730               return TokenNamedefine;
2731             else
2732               return TokenNameIdentifier;
2733           case 7 :
2734             if ((data[++index] == 'e')
2735               && (data[++index] == 'f')
2736               && (data[++index] == 'a')
2737               && (data[++index] == 'u')
2738               && (data[++index] == 'l')
2739               && (data[++index] == 't'))
2740               return TokenNamedefault;
2741             else
2742               return TokenNameIdentifier;
2743           default :
2744             return TokenNameIdentifier;
2745         }
2746       case 'e' : //echo else elseif extends
2747         switch (length) {
2748           case 4 :
2749             if ((data[++index] == 'c')
2750               && (data[++index] == 'h')
2751               && (data[++index] == 'o'))
2752               return TokenNameecho;
2753             else if (
2754               (data[index] == 'l')
2755                 && (data[++index] == 's')
2756                 && (data[++index] == 'e'))
2757               return TokenNameelse;
2758             else
2759               return TokenNameIdentifier;
2760           case 5 : // endif
2761             if ((data[++index] == 'n')
2762               && (data[++index] == 'd')
2763               && (data[++index] == 'i')
2764               && (data[++index] == 'f'))
2765               return TokenNameendif;
2766             else
2767               return TokenNameIdentifier;
2768           case 6 : // endfor
2769             if ((data[++index] == 'n')
2770               && (data[++index] == 'd')
2771               && (data[++index] == 'f')
2772               && (data[++index] == 'o')
2773               && (data[++index] == 'r'))
2774               return TokenNameendfor;
2775             else if (
2776               (data[index] == 'l')
2777                 && (data[++index] == 's')
2778                 && (data[++index] == 'e')
2779                 && (data[++index] == 'i')
2780                 && (data[++index] == 'f'))
2781               return TokenNameelseif;
2782             else
2783               return TokenNameIdentifier;
2784           case 7 :
2785             if ((data[++index] == 'x')
2786               && (data[++index] == 't')
2787               && (data[++index] == 'e')
2788               && (data[++index] == 'n')
2789               && (data[++index] == 'd')
2790               && (data[++index] == 's'))
2791               return TokenNameextends;
2792             else
2793               return TokenNameIdentifier;
2794           case 8 : // endwhile
2795             if ((data[++index] == 'n')
2796               && (data[++index] == 'd')
2797               && (data[++index] == 'w')
2798               && (data[++index] == 'h')
2799               && (data[++index] == 'i')
2800               && (data[++index] == 'l')
2801               && (data[++index] == 'e'))
2802               return TokenNameendwhile;
2803             else
2804               return TokenNameIdentifier;
2805           case 9 : // endswitch
2806             if ((data[++index] == 'n')
2807               && (data[++index] == 'd')
2808               && (data[++index] == 's')
2809               && (data[++index] == 'w')
2810               && (data[++index] == 'i')
2811               && (data[++index] == 't')
2812               && (data[++index] == 'c')
2813               && (data[++index] == 'h'))
2814               return TokenNameendswitch;
2815             else
2816               return TokenNameIdentifier;
2817           case 10 : // endforeach
2818             if ((data[++index] == 'n')
2819               && (data[++index] == 'd')
2820               && (data[++index] == 'f')
2821               && (data[++index] == 'o')
2822               && (data[++index] == 'r')
2823               && (data[++index] == 'e')
2824               && (data[++index] == 'a')
2825               && (data[++index] == 'c')
2826               && (data[++index] == 'h'))
2827               return TokenNameendforeach;
2828             else
2829               return TokenNameIdentifier;
2830
2831           default :
2832             return TokenNameIdentifier;
2833         }
2834
2835       case 'f' : //for false function
2836         switch (length) {
2837           case 3 :
2838             if ((data[++index] == 'o') && (data[++index] == 'r'))
2839               return TokenNamefor;
2840             else
2841               return TokenNameIdentifier;
2842           case 5 :
2843             if ((data[++index] == 'a')
2844               && (data[++index] == 'l')
2845               && (data[++index] == 's')
2846               && (data[++index] == 'e'))
2847               return TokenNamefalse;
2848             else
2849               return TokenNameIdentifier;
2850           case 7 : // function
2851             if ((data[++index] == 'o')
2852               && (data[++index] == 'r')
2853               && (data[++index] == 'e')
2854               && (data[++index] == 'a')
2855               && (data[++index] == 'c')
2856               && (data[++index] == 'h'))
2857               return TokenNameforeach;
2858             else
2859               return TokenNameIdentifier;
2860           case 8 : // function
2861             if ((data[++index] == 'u')
2862               && (data[++index] == 'n')
2863               && (data[++index] == 'c')
2864               && (data[++index] == 't')
2865               && (data[++index] == 'i')
2866               && (data[++index] == 'o')
2867               && (data[++index] == 'n'))
2868               return TokenNamefunction;
2869             else
2870               return TokenNameIdentifier;
2871           default :
2872             return TokenNameIdentifier;
2873         }
2874       case 'g' : //global
2875         if (length == 6) {
2876           if ((data[++index] == 'l')
2877             && (data[++index] == 'o')
2878             && (data[++index] == 'b')
2879             && (data[++index] == 'a')
2880             && (data[++index] == 'l')) {
2881             return TokenNameglobal;
2882           }
2883         }
2884         return TokenNameIdentifier;
2885
2886       case 'i' : //if int
2887         switch (length) {
2888           case 2 :
2889             if (data[++index] == 'f')
2890               return TokenNameif;
2891             else
2892               return TokenNameIdentifier;
2893             //          case 3 :
2894             //            if ((data[++index] == 'n') && (data[++index] == 't'))
2895             //              return TokenNameint;
2896             //            else
2897             //              return TokenNameIdentifier;
2898           case 7 :
2899             if ((data[++index] == 'n')
2900               && (data[++index] == 'c')
2901               && (data[++index] == 'l')
2902               && (data[++index] == 'u')
2903               && (data[++index] == 'd')
2904               && (data[++index] == 'e'))
2905               return TokenNameinclude;
2906             else
2907               return TokenNameIdentifier;
2908           case 12 :
2909             if ((data[++index] == 'n')
2910               && (data[++index] == 'c')
2911               && (data[++index] == 'l')
2912               && (data[++index] == 'u')
2913               && (data[++index] == 'd')
2914               && (data[++index] == 'e')
2915               && (data[++index] == '_')
2916               && (data[++index] == 'o')
2917               && (data[++index] == 'n')
2918               && (data[++index] == 'c')
2919               && (data[++index] == 'e'))
2920               return TokenNameinclude_once;
2921             else
2922               return TokenNameIdentifier;
2923           default :
2924             return TokenNameIdentifier;
2925         }
2926
2927       case 'l' : //list
2928         if (length == 4) {
2929           if ((data[++index] == 'i')
2930             && (data[++index] == 's')
2931             && (data[++index] == 't')) {
2932             return TokenNamelist;
2933           }
2934         }
2935         return TokenNameIdentifier;
2936
2937       case 'n' : // new null
2938         switch (length) {
2939           case 3 :
2940             if ((data[++index] == 'e') && (data[++index] == 'w'))
2941               return TokenNamenew;
2942             else
2943               return TokenNameIdentifier;
2944           case 4 :
2945             if ((data[++index] == 'u')
2946               && (data[++index] == 'l')
2947               && (data[++index] == 'l'))
2948               return TokenNamenull;
2949             else
2950               return TokenNameIdentifier;
2951
2952           default :
2953             return TokenNameIdentifier;
2954         }
2955       case 'o' : // or old_function
2956         if (length == 2) {
2957           if (data[++index] == 'r') {
2958             return TokenNameOR;
2959           }
2960         }
2961         //        if (length == 12) {
2962         //          if ((data[++index] == 'l')
2963         //            && (data[++index] == 'd')
2964         //            && (data[++index] == '_')
2965         //            && (data[++index] == 'f')
2966         //            && (data[++index] == 'u')
2967         //            && (data[++index] == 'n')
2968         //            && (data[++index] == 'c')
2969         //            && (data[++index] == 't')
2970         //            && (data[++index] == 'i')
2971         //            && (data[++index] == 'o')
2972         //            && (data[++index] == 'n')) {
2973         //            return TokenNameold_function;
2974         //          }
2975         //        }
2976         return TokenNameIdentifier;
2977
2978       case 'p' : // print
2979         if (length == 5) {
2980           if ((data[++index] == 'r')
2981             && (data[++index] == 'i')
2982             && (data[++index] == 'n')
2983             && (data[++index] == 't')) {
2984             return TokenNameprint;
2985           }
2986         }
2987         return TokenNameIdentifier;
2988       case 'r' : //return require require_once
2989         if (length == 6) {
2990           if ((data[++index] == 'e')
2991             && (data[++index] == 't')
2992             && (data[++index] == 'u')
2993             && (data[++index] == 'r')
2994             && (data[++index] == 'n')) {
2995             return TokenNamereturn;
2996           }
2997         } else if (length == 7) {
2998           if ((data[++index] == 'e')
2999             && (data[++index] == 'q')
3000             && (data[++index] == 'u')
3001             && (data[++index] == 'i')
3002             && (data[++index] == 'r')
3003             && (data[++index] == 'e')) {
3004             return TokenNamerequire;
3005           }
3006         } else if (length == 12) {
3007           if ((data[++index] == 'e')
3008             && (data[++index] == 'q')
3009             && (data[++index] == 'u')
3010             && (data[++index] == 'i')
3011             && (data[++index] == 'r')
3012             && (data[++index] == 'e')
3013             && (data[++index] == '_')
3014             && (data[++index] == 'o')
3015             && (data[++index] == 'n')
3016             && (data[++index] == 'c')
3017             && (data[++index] == 'e')) {
3018             return TokenNamerequire_once;
3019           }
3020         } else
3021           return TokenNameIdentifier;
3022
3023       case 's' : //static switch
3024         switch (length) {
3025           case 6 :
3026             if (data[++index] == 't')
3027               if ((data[++index] == 'a')
3028                 && (data[++index] == 't')
3029                 && (data[++index] == 'i')
3030                 && (data[++index] == 'c')) {
3031                 return TokenNamestatic;
3032               } else
3033                 return TokenNameIdentifier;
3034             else if (
3035               (data[index] == 'w')
3036                 && (data[++index] == 'i')
3037                 && (data[++index] == 't')
3038                 && (data[++index] == 'c')
3039                 && (data[++index] == 'h'))
3040               return TokenNameswitch;
3041             else
3042               return TokenNameIdentifier;
3043           default :
3044             return TokenNameIdentifier;
3045         }
3046
3047       case 't' : // true
3048         switch (length) {
3049
3050           case 4 :
3051             if ((data[++index] == 'r')
3052               && (data[++index] == 'u')
3053               && (data[++index] == 'e'))
3054               return TokenNametrue;
3055             else
3056               return TokenNameIdentifier;
3057             //            if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
3058             //              return TokenNamethis;
3059
3060           default :
3061             return TokenNameIdentifier;
3062         }
3063
3064       case 'v' : //var
3065         switch (length) {
3066           case 3 :
3067             if ((data[++index] == 'a') && (data[++index] == 'r'))
3068               return TokenNamevar;
3069             else
3070               return TokenNameIdentifier;
3071
3072           default :
3073             return TokenNameIdentifier;
3074         }
3075
3076       case 'w' : //while
3077         switch (length) {
3078           case 5 :
3079             if ((data[++index] == 'h')
3080               && (data[++index] == 'i')
3081               && (data[++index] == 'l')
3082               && (data[++index] == 'e'))
3083               return TokenNamewhile;
3084             else
3085               return TokenNameIdentifier;
3086             //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
3087             //return TokenNamewidefp ;
3088             //else
3089             //return TokenNameIdentifier;
3090           default :
3091             return TokenNameIdentifier;
3092         }
3093
3094       case 'x' : //xor
3095         switch (length) {
3096           case 3 :
3097             if ((data[++index] == 'o') && (data[++index] == 'r'))
3098               return TokenNameXOR;
3099             else
3100               return TokenNameIdentifier;
3101
3102           default :
3103             return TokenNameIdentifier;
3104         }
3105       default :
3106         return TokenNameIdentifier;
3107     }
3108   }
3109   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3110
3111     //when entering this method the currentCharacter is the firt
3112     //digit of the number , i.e. it may be preceeded by a . when
3113     //dotPrefix is true
3114
3115     boolean floating = dotPrefix;
3116     if ((!dotPrefix) && (currentCharacter == '0')) {
3117       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3118         //force the first char of the hexa number do exist...
3119         // consume next character
3120         unicodeAsBackSlash = false;
3121         if (((currentCharacter = source[currentPosition++]) == '\\')
3122           && (source[currentPosition] == 'u')) {
3123           getNextUnicodeChar();
3124         } else {
3125           if (withoutUnicodePtr != 0) {
3126             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3127           }
3128         }
3129         if (Character.digit(currentCharacter, 16) == -1)
3130           throw new InvalidInputException(INVALID_HEXA);
3131         //---end forcing--
3132         while (getNextCharAsDigit(16)) {
3133         };
3134         //        if (getNextChar('l', 'L') >= 0)
3135         //          return TokenNameLongLiteral;
3136         //        else
3137         return TokenNameIntegerLiteral;
3138       }
3139
3140       //there is x or X in the number
3141       //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3142       if (getNextCharAsDigit()) {
3143         //-------------potential octal-----------------
3144         while (getNextCharAsDigit()) {
3145         };
3146
3147         //        if (getNextChar('l', 'L') >= 0) {
3148         //          return TokenNameLongLiteral;
3149         //        }
3150         //
3151         //        if (getNextChar('f', 'F') >= 0) {
3152         //          return TokenNameFloatingPointLiteral;
3153         //        }
3154
3155         if (getNextChar('d', 'D') >= 0) {
3156           return TokenNameDoubleLiteral;
3157         } else { //make the distinction between octal and float ....
3158           if (getNextChar('.')) { //bingo ! ....
3159             while (getNextCharAsDigit()) {
3160             };
3161             if (getNextChar('e', 'E') >= 0) {
3162               // consume next character
3163               unicodeAsBackSlash = false;
3164               if (((currentCharacter = source[currentPosition++]) == '\\')
3165                 && (source[currentPosition] == 'u')) {
3166                 getNextUnicodeChar();
3167               } else {
3168                 if (withoutUnicodePtr != 0) {
3169                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3170                 }
3171               }
3172
3173               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3174                 // consume next character
3175                 unicodeAsBackSlash = false;
3176                 if (((currentCharacter = source[currentPosition++]) == '\\')
3177                   && (source[currentPosition] == 'u')) {
3178                   getNextUnicodeChar();
3179                 } else {
3180                   if (withoutUnicodePtr != 0) {
3181                     withoutUnicodeBuffer[++withoutUnicodePtr] =
3182                       currentCharacter;
3183                   }
3184                 }
3185               }
3186               if (!Character.isDigit(currentCharacter))
3187                 throw new InvalidInputException(INVALID_FLOAT);
3188               while (getNextCharAsDigit()) {
3189               };
3190             }
3191             //            if (getNextChar('f', 'F') >= 0)
3192             //              return TokenNameFloatingPointLiteral;
3193             getNextChar('d', 'D'); //jump over potential d or D
3194             return TokenNameDoubleLiteral;
3195           } else {
3196             return TokenNameIntegerLiteral;
3197           }
3198         }
3199       } else {
3200         /* carry on */
3201       }
3202     }
3203
3204     while (getNextCharAsDigit()) {
3205     };
3206
3207     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3208     //      return TokenNameLongLiteral;
3209
3210     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3211       while (getNextCharAsDigit()) {
3212       };
3213       floating = true;
3214     }
3215
3216     //if floating is true both exponant and suffix may be optional
3217
3218     if (getNextChar('e', 'E') >= 0) {
3219       floating = true;
3220       // consume next character
3221       unicodeAsBackSlash = false;
3222       if (((currentCharacter = source[currentPosition++]) == '\\')
3223         && (source[currentPosition] == 'u')) {
3224         getNextUnicodeChar();
3225       } else {
3226         if (withoutUnicodePtr != 0) {
3227           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3228         }
3229       }
3230
3231       if ((currentCharacter == '-')
3232         || (currentCharacter == '+')) { // consume next character
3233         unicodeAsBackSlash = false;
3234         if (((currentCharacter = source[currentPosition++]) == '\\')
3235           && (source[currentPosition] == 'u')) {
3236           getNextUnicodeChar();
3237         } else {
3238           if (withoutUnicodePtr != 0) {
3239             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3240           }
3241         }
3242       }
3243       if (!Character.isDigit(currentCharacter))
3244         throw new InvalidInputException(INVALID_FLOAT);
3245       while (getNextCharAsDigit()) {
3246       };
3247     }
3248
3249     if (getNextChar('d', 'D') >= 0)
3250       return TokenNameDoubleLiteral;
3251     //    if (getNextChar('f', 'F') >= 0)
3252     //      return TokenNameFloatingPointLiteral;
3253
3254     //the long flag has been tested before
3255
3256     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3257   }
3258   /**
3259    * Search the line number corresponding to a specific position
3260    *
3261    */
3262   public final int getLineNumber(int position) {
3263
3264     if (lineEnds == null)
3265       return 1;
3266     int length = linePtr + 1;
3267     if (length == 0)
3268       return 1;
3269     int g = 0, d = length - 1;
3270     int m = 0;
3271     while (g <= d) {
3272       m = (g + d) / 2;
3273       if (position < lineEnds[m]) {
3274         d = m - 1;
3275       } else if (position > lineEnds[m]) {
3276         g = m + 1;
3277       } else {
3278         return m + 1;
3279       }
3280     }
3281     if (position < lineEnds[m]) {
3282       return m + 1;
3283     }
3284     return m + 2;
3285   }
3286
3287   public void setPHPMode(boolean mode) {
3288     phpMode = mode;
3289   }
3290
3291   public final void setSource(char[] source) {
3292     //the source-buffer is set to sourceString
3293
3294     if (source == null) {
3295       this.source = new char[0];
3296     } else {
3297       this.source = source;
3298     }
3299     startPosition = -1;
3300     initialPosition = currentPosition = 0;
3301     containsAssertKeyword = false;
3302     withoutUnicodeBuffer = new char[this.source.length];
3303
3304   }
3305
3306   public String toString() {
3307     if (startPosition == source.length)
3308       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3309     if (currentPosition > source.length)
3310       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3311
3312     char front[] = new char[startPosition];
3313     System.arraycopy(source, 0, front, 0, startPosition);
3314
3315     int middleLength = (currentPosition - 1) - startPosition + 1;
3316     char middle[];
3317     if (middleLength > -1) {
3318       middle = new char[middleLength];
3319       System.arraycopy(source, startPosition, middle, 0, middleLength);
3320     } else {
3321       middle = new char[0];
3322     }
3323
3324     char end[] = new char[source.length - (currentPosition - 1)];
3325     System.arraycopy(
3326       source,
3327       (currentPosition - 1) + 1,
3328       end,
3329       0,
3330       source.length - (currentPosition - 1) - 1);
3331
3332     return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3333     + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3334     + new String(end);
3335   }
3336   public final String toStringAction(int act) {
3337     switch (act) {
3338       case TokenNameERROR :
3339         return "ScannerError(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3340       case TokenNameStopPHP :
3341         return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3342       case TokenNameIdentifier :
3343         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3344       case TokenNameVariable :
3345         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3346       case TokenNameas :
3347         return "as"; //$NON-NLS-1$
3348       case TokenNamebreak :
3349         return "break"; //$NON-NLS-1$
3350       case TokenNamecase :
3351         return "case"; //$NON-NLS-1$
3352       case TokenNameclass :
3353         return "class"; //$NON-NLS-1$
3354       case TokenNamecontinue :
3355         return "continue"; //$NON-NLS-1$
3356       case TokenNamedefault :
3357         return "default"; //$NON-NLS-1$
3358       case TokenNamedefine :
3359         return "define"; //$NON-NLS-1$
3360       case TokenNamedo :
3361         return "do"; //$NON-NLS-1$
3362       case TokenNameecho :
3363         return "echo"; //$NON-NLS-1$
3364       case TokenNameelse :
3365         return "else"; //$NON-NLS-1$
3366       case TokenNameelseif :
3367         return "elseif"; //$NON-NLS-1$
3368       case TokenNameendfor :
3369         return "endfor"; //$NON-NLS-1$
3370       case TokenNameendforeach :
3371         return "endforeach"; //$NON-NLS-1$
3372       case TokenNameendif :
3373         return "endif"; //$NON-NLS-1$
3374       case TokenNameendswitch :
3375         return "endswitch"; //$NON-NLS-1$
3376       case TokenNameendwhile :
3377         return "endwhile"; //$NON-NLS-1$
3378       case TokenNameextends :
3379         return "extends"; //$NON-NLS-1$
3380       case TokenNamefalse :
3381         return "false"; //$NON-NLS-1$
3382       case TokenNamefor :
3383         return "for"; //$NON-NLS-1$
3384       case TokenNameforeach :
3385         return "foreach"; //$NON-NLS-1$
3386       case TokenNamefunction :
3387         return "function"; //$NON-NLS-1$
3388       case TokenNameglobal :
3389         return "global"; //$NON-NLS-1$
3390       case TokenNameif :
3391         return "if"; //$NON-NLS-1$
3392       case TokenNameinclude :
3393         return "include"; //$NON-NLS-1$
3394       case TokenNameinclude_once :
3395         return "include_once"; //$NON-NLS-1$
3396       case TokenNamelist :
3397         return "list"; //$NON-NLS-1$
3398       case TokenNamenew :
3399         return "new"; //$NON-NLS-1$
3400       case TokenNamenull :
3401         return "null"; //$NON-NLS-1$
3402       case TokenNameprint :
3403         return "print"; //$NON-NLS-1$
3404       case TokenNamerequire :
3405         return "require"; //$NON-NLS-1$
3406       case TokenNamerequire_once :
3407         return "require_once"; //$NON-NLS-1$
3408       case TokenNamereturn :
3409         return "return"; //$NON-NLS-1$
3410       case TokenNamestatic :
3411         return "static"; //$NON-NLS-1$
3412       case TokenNameswitch :
3413         return "switch"; //$NON-NLS-1$
3414       case TokenNametrue :
3415         return "true"; //$NON-NLS-1$
3416       case TokenNamevar :
3417         return "var"; //$NON-NLS-1$
3418       case TokenNamewhile :
3419         return "while"; //$NON-NLS-1$
3420       case TokenNameIntegerLiteral :
3421         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3422       case TokenNameDoubleLiteral :
3423         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3424       case TokenNameStringLiteral :
3425         return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3426       case TokenNameStringConstant :
3427         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3428       case TokenNameStringInterpolated :
3429         return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3430       case TokenNameHEREDOC :
3431         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3432
3433       case TokenNamePLUS_PLUS :
3434         return "++"; //$NON-NLS-1$
3435       case TokenNameMINUS_MINUS :
3436         return "--"; //$NON-NLS-1$
3437       case TokenNameEQUAL_EQUAL :
3438         return "=="; //$NON-NLS-1$
3439       case TokenNameEQUAL_GREATER :
3440         return "=>"; //$NON-NLS-1$
3441       case TokenNameLESS_EQUAL :
3442         return "<="; //$NON-NLS-1$
3443       case TokenNameGREATER_EQUAL :
3444         return ">="; //$NON-NLS-1$
3445       case TokenNameNOT_EQUAL :
3446         return "!="; //$NON-NLS-1$
3447       case TokenNameLEFT_SHIFT :
3448         return "<<"; //$NON-NLS-1$
3449       case TokenNameRIGHT_SHIFT :
3450         return ">>"; //$NON-NLS-1$
3451       case TokenNamePLUS_EQUAL :
3452         return "+="; //$NON-NLS-1$
3453       case TokenNameMINUS_EQUAL :
3454         return "-="; //$NON-NLS-1$
3455       case TokenNameMULTIPLY_EQUAL :
3456         return "*="; //$NON-NLS-1$
3457       case TokenNameDIVIDE_EQUAL :
3458         return "/="; //$NON-NLS-1$
3459       case TokenNameAND_EQUAL :
3460         return "&="; //$NON-NLS-1$
3461       case TokenNameOR_EQUAL :
3462         return "|="; //$NON-NLS-1$
3463       case TokenNameXOR_EQUAL :
3464         return "^="; //$NON-NLS-1$
3465       case TokenNameREMAINDER_EQUAL :
3466         return "%="; //$NON-NLS-1$
3467       case TokenNameLEFT_SHIFT_EQUAL :
3468         return "<<="; //$NON-NLS-1$
3469       case TokenNameRIGHT_SHIFT_EQUAL :
3470         return ">>="; //$NON-NLS-1$
3471       case TokenNameOR_OR :
3472         return "||"; //$NON-NLS-1$
3473       case TokenNameAND_AND :
3474         return "&&"; //$NON-NLS-1$
3475       case TokenNamePLUS :
3476         return "+"; //$NON-NLS-1$
3477       case TokenNameMINUS :
3478         return "-"; //$NON-NLS-1$
3479       case TokenNameMINUS_GREATER :
3480         return "->";
3481       case TokenNameNOT :
3482         return "!"; //$NON-NLS-1$
3483       case TokenNameREMAINDER :
3484         return "%"; //$NON-NLS-1$
3485       case TokenNameXOR :
3486         return "^"; //$NON-NLS-1$
3487       case TokenNameAND :
3488         return "&"; //$NON-NLS-1$
3489       case TokenNameMULTIPLY :
3490         return "*"; //$NON-NLS-1$
3491       case TokenNameOR :
3492         return "|"; //$NON-NLS-1$
3493       case TokenNameTWIDDLE :
3494         return "~"; //$NON-NLS-1$
3495       case TokenNameTWIDDLE_EQUAL :
3496         return "~="; //$NON-NLS-1$
3497       case TokenNameDIVIDE :
3498         return "/"; //$NON-NLS-1$
3499       case TokenNameGREATER :
3500         return ">"; //$NON-NLS-1$
3501       case TokenNameLESS :
3502         return "<"; //$NON-NLS-1$
3503       case TokenNameLPAREN :
3504         return "("; //$NON-NLS-1$
3505       case TokenNameRPAREN :
3506         return ")"; //$NON-NLS-1$
3507       case TokenNameLBRACE :
3508         return "{"; //$NON-NLS-1$
3509       case TokenNameRBRACE :
3510         return "}"; //$NON-NLS-1$
3511       case TokenNameLBRACKET :
3512         return "["; //$NON-NLS-1$
3513       case TokenNameRBRACKET :
3514         return "]"; //$NON-NLS-1$
3515       case TokenNameSEMICOLON :
3516         return ";"; //$NON-NLS-1$
3517       case TokenNameQUESTION :
3518         return "?"; //$NON-NLS-1$
3519       case TokenNameCOLON :
3520         return ":"; //$NON-NLS-1$
3521       case TokenNameCOMMA :
3522         return ","; //$NON-NLS-1$
3523       case TokenNameDOT :
3524         return "."; //$NON-NLS-1$
3525       case TokenNameEQUAL :
3526         return "="; //$NON-NLS-1$
3527       case TokenNameAT :
3528         return "@";
3529       case TokenNameDOLLAR_LBRACE :
3530         return "${";
3531       case TokenNameEOF :
3532         return "EOF"; //$NON-NLS-1$
3533       default :
3534         return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3535     }
3536   }
3537
3538   public Scanner(
3539     boolean tokenizeComments,
3540     boolean tokenizeWhiteSpace,
3541     boolean checkNonExternalizedStringLiterals) {
3542     this(
3543       tokenizeComments,
3544       tokenizeWhiteSpace,
3545       checkNonExternalizedStringLiterals,
3546       false);
3547   }
3548
3549   public Scanner(
3550     boolean tokenizeComments,
3551     boolean tokenizeWhiteSpace,
3552     boolean checkNonExternalizedStringLiterals,
3553     boolean assertMode) {
3554     this.eofPosition = Integer.MAX_VALUE;
3555     this.tokenizeComments = tokenizeComments;
3556     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3557     this.checkNonExternalizedStringLiterals =
3558       checkNonExternalizedStringLiterals;
3559     this.assertMode = assertMode;
3560   }
3561
3562   private void checkNonExternalizeString() throws InvalidInputException {
3563     if (currentLine == null)
3564       return;
3565     parseTags(currentLine);
3566   }
3567
3568   private void parseTags(NLSLine line) throws InvalidInputException {
3569     String s = new String(getCurrentTokenSource());
3570     int pos = s.indexOf(TAG_PREFIX);
3571     int lineLength = line.size();
3572     while (pos != -1) {
3573       int start = pos + TAG_PREFIX_LENGTH;
3574       int end = s.indexOf(TAG_POSTFIX, start);
3575       String index = s.substring(start, end);
3576       int i = 0;
3577       try {
3578         i = Integer.parseInt(index) - 1;
3579         // Tags are one based not zero based.
3580       } catch (NumberFormatException e) {
3581         i = -1; // we don't want to consider this as a valid NLS tag
3582       }
3583       if (line.exists(i)) {
3584         line.set(i, null);
3585       }
3586       pos = s.indexOf(TAG_PREFIX, start);
3587     }
3588
3589     this.nonNLSStrings = new StringLiteral[lineLength];
3590     int nonNLSCounter = 0;
3591     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3592       StringLiteral literal = (StringLiteral) iterator.next();
3593       if (literal != null) {
3594         this.nonNLSStrings[nonNLSCounter++] = literal;
3595       }
3596     }
3597     if (nonNLSCounter == 0) {
3598       this.nonNLSStrings = null;
3599       currentLine = null;
3600       return;
3601     }
3602     this.wasNonExternalizedStringLiteral = true;
3603     if (nonNLSCounter != lineLength) {
3604       System.arraycopy(
3605         this.nonNLSStrings,
3606         0,
3607         (this.nonNLSStrings = new StringLiteral[nonNLSCounter]),
3608         0,
3609         nonNLSCounter);
3610     }
3611     currentLine = null;
3612   }
3613 }