net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java

   1 /*******************************************************************************
   2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
   3  * All rights reserved. This program and the accompanying materials
   4  * are made available under the terms of the Common Public License v0.5
   5  * which accompanies this distribution, and is available at
   6  * http://www.eclipse.org/legal/cpl-v05.html
   7  *
   8  * Contributors:
   9  *     IBM Corporation - initial API and implementation
  10  ******************************************************************************/
  11 package net.sourceforge.phpdt.internal.compiler.parser;
  12
  13 import java.util.ArrayList;
  14 import java.util.Iterator;
  15 import java.util.List;
  16
  17 import net.sourceforge.phpdt.core.compiler.*;
  18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
  19
  20 public class Scanner implements IScanner, ITerminalSymbols {
  21
  22   /* APIs ares
  23    - getNextToken() which return the current type of the token
  24      (this value is not memorized by the scanner)
  25    - getCurrentTokenSource() which provides with the token "REAL" source
  26      (aka all unicode have been transformed into a correct char)
  27    - sourceStart gives the position into the stream
  28    - currentPosition-1 gives the sourceEnd position into the stream
  29   */
  30
  31   // 1.4 feature
  32   private boolean assertMode;
  33   public boolean useAssertAsAnIndentifier = false;
  34   //flag indicating if processed source contains occurrences of keyword assert
  35   public boolean containsAssertKeyword = false;
  36
  37   public boolean recordLineSeparator;
  38   public boolean phpMode = false;
  39
  40   public char currentCharacter;
  41   public int startPosition;
  42   public int currentPosition;
  43   public int initialPosition, eofPosition;
  44   // after this position eof are generated instead of real token from the source
  45
  46   public boolean tokenizeComments;
  47   public boolean tokenizeWhiteSpace;
  48
  49   //source should be viewed as a window (aka a part)
  50   //of a entire very large stream
  51   public char source[];
  52
  53   //unicode support
  54   public char[] withoutUnicodeBuffer;
  55   public int withoutUnicodePtr;
  56   //when == 0 ==> no unicode in the current token
  57   public boolean unicodeAsBackSlash = false;
  58
  59   public boolean scanningFloatLiteral = false;
  60
  61   //support for /** comments
  62   //public char[][] comments = new char[10][];
  63   public int[] commentStops = new int[10];
  64   public int[] commentStarts = new int[10];
  65   public int commentPtr = -1; // no comment test with commentPtr value -1
  66
  67   //diet parsing support - jump over some method body when requested
  68   public boolean diet = false;
  69
  70   //support for the  poor-line-debuggers ....
  71   //remember the position of the cr/lf
  72   public int[] lineEnds = new int[250];
  73   public int linePtr = -1;
  74   public boolean wasAcr = false;
  75
  76   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
  77
  78   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
  79   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
  80   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
  81   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
  82   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
  83   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
  84   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
  85
  86   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
  87   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
  88   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
  89   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
  90
  91   //----------------optimized identifier managment------------------
  92   static final char[] charArray_a = new char[] { 'a' },
  93     charArray_b = new char[] { 'b' },
  94     charArray_c = new char[] { 'c' },
  95     charArray_d = new char[] { 'd' },
  96     charArray_e = new char[] { 'e' },
  97     charArray_f = new char[] { 'f' },
  98     charArray_g = new char[] { 'g' },
  99     charArray_h = new char[] { 'h' },
 100     charArray_i = new char[] { 'i' },
 101     charArray_j = new char[] { 'j' },
 102     charArray_k = new char[] { 'k' },
 103     charArray_l = new char[] { 'l' },
 104     charArray_m = new char[] { 'm' },
 105     charArray_n = new char[] { 'n' },
 106     charArray_o = new char[] { 'o' },
 107     charArray_p = new char[] { 'p' },
 108     charArray_q = new char[] { 'q' },
 109     charArray_r = new char[] { 'r' },
 110     charArray_s = new char[] { 's' },
 111     charArray_t = new char[] { 't' },
 112     charArray_u = new char[] { 'u' },
 113     charArray_v = new char[] { 'v' },
 114     charArray_w = new char[] { 'w' },
 115     charArray_x = new char[] { 'x' },
 116     charArray_y = new char[] { 'y' },
 117     charArray_z = new char[] { 'z' };
 118
 119   static final char[] initCharArray =
 120     new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
 121   static final int TableSize = 30, InternalTableSize = 6;
 122   //30*6 = 180 entries
 123   public static final int OptimizedLength = 6;
 124   public /*static*/
 125   final char[][][][] charArray_length =
 126     new char[OptimizedLength][TableSize][InternalTableSize][];
 127   // support for detecting non-externalized string literals
 128   int currentLineNr = -1;
 129   int previousLineNr = -1;
 130   NLSLine currentLine = null;
 131   List lines = new ArrayList();
 132   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
 133   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
 134   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
 135   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
 136   public StringLiteral[] nonNLSStrings = null;
 137   public boolean checkNonExternalizedStringLiterals = true;
 138   public boolean wasNonExternalizedStringLiteral = false;
 139
 140   /*static*/ {
 141     for (int i = 0; i < 6; i++) {
 142       for (int j = 0; j < TableSize; j++) {
 143         for (int k = 0; k < InternalTableSize; k++) {
 144           charArray_length[i][j][k] = initCharArray;
 145         }
 146       }
 147     }
 148   }
 149   static int newEntry2 = 0,
 150     newEntry3 = 0,
 151     newEntry4 = 0,
 152     newEntry5 = 0,
 153     newEntry6 = 0;
 154
 155   public static final int RoundBracket = 0;
 156   public static final int SquareBracket = 1;
 157   public static final int CurlyBracket = 2;
 158   public static final int BracketKinds = 3;
 159   public Scanner() {
 160     this(false, false);
 161   }
 162   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
 163     this(tokenizeComments, tokenizeWhiteSpace, false);
 164   }
 165   public final boolean atEnd() {
 166     // This code is not relevant if source is
 167     // Only a part of the real stream input
 168
 169     return source.length == currentPosition;
 170   }
 171   public char[] getCurrentIdentifierSource() {
 172     //return the token REAL source (aka unicodes are precomputed)
 173
 174     char[] result;
 175     if (withoutUnicodePtr != 0)
 176       //0 is used as a fast test flag so the real first char is in position 1
 177       System.arraycopy(
 178         withoutUnicodeBuffer,
 179         1,
 180         result = new char[withoutUnicodePtr],
 181         0,
 182         withoutUnicodePtr);
 183     else {
 184       int length = currentPosition - startPosition;
 185       switch (length) { // see OptimizedLength
 186         case 1 :
 187           return optimizedCurrentTokenSource1();
 188         case 2 :
 189           return optimizedCurrentTokenSource2();
 190         case 3 :
 191           return optimizedCurrentTokenSource3();
 192         case 4 :
 193           return optimizedCurrentTokenSource4();
 194         case 5 :
 195           return optimizedCurrentTokenSource5();
 196         case 6 :
 197           return optimizedCurrentTokenSource6();
 198       }
 199       //no optimization
 200       System.arraycopy(
 201         source,
 202         startPosition,
 203         result = new char[length],
 204         0,
 205         length);
 206     }
 207     return result;
 208   }
 209   public int getCurrentTokenEndPosition() {
 210     return this.currentPosition - 1;
 211   }
 212   public final char[] getCurrentTokenSource() {
 213     // Return the token REAL source (aka unicodes are precomputed)
 214
 215     char[] result;
 216     if (withoutUnicodePtr != 0)
 217       // 0 is used as a fast test flag so the real first char is in position 1
 218       System.arraycopy(
 219         withoutUnicodeBuffer,
 220         1,
 221         result = new char[withoutUnicodePtr],
 222         0,
 223         withoutUnicodePtr);
 224     else {
 225       int length;
 226       System.arraycopy(
 227         source,
 228         startPosition,
 229         result = new char[length = currentPosition - startPosition],
 230         0,
 231         length);
 232     }
 233     return result;
 234   }
 235
 236   public final char[] getCurrentTokenSource(int startPos) {
 237     // Return the token REAL source (aka unicodes are precomputed)
 238
 239     char[] result;
 240     if (withoutUnicodePtr != 0)
 241       // 0 is used as a fast test flag so the real first char is in position 1
 242       System.arraycopy(
 243         withoutUnicodeBuffer,
 244         1,
 245         result = new char[withoutUnicodePtr],
 246         0,
 247         withoutUnicodePtr);
 248     else {
 249       int length;
 250       System.arraycopy(
 251         source,
 252         startPos,
 253         result = new char[length = currentPosition - startPos],
 254         0,
 255         length);
 256     }
 257     return result;
 258   }
 259
 260   public final char[] getCurrentTokenSourceString() {
 261     //return the token REAL source (aka unicodes are precomputed).
 262     //REMOVE the two " that are at the beginning and the end.
 263
 264     char[] result;
 265     if (withoutUnicodePtr != 0)
 266       //0 is used as a fast test flag so the real first char is in position 1
 267       System.arraycopy(withoutUnicodeBuffer, 2,
 268       //2 is 1 (real start) + 1 (to jump over the ")
 269       result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
 270     else {
 271       int length;
 272       System.arraycopy(
 273         source,
 274         startPosition + 1,
 275         result = new char[length = currentPosition - startPosition - 2],
 276         0,
 277         length);
 278     }
 279     return result;
 280   }
 281   public int getCurrentTokenStartPosition() {
 282     return this.startPosition;
 283   }
 284   /*
 285    * Search the source position corresponding to the end of a given line number
 286    *
 287    * Line numbers are 1-based, and relative to the scanner initialPosition.
 288    * Character positions are 0-based.
 289    *
 290    * In case the given line number is inconsistent, answers -1.
 291    */
 292   public final int getLineEnd(int lineNumber) {
 293
 294     if (lineEnds == null)
 295       return -1;
 296     if (lineNumber >= lineEnds.length)
 297       return -1;
 298     if (lineNumber <= 0)
 299       return -1;
 300
 301     if (lineNumber == lineEnds.length - 1)
 302       return eofPosition;
 303     return lineEnds[lineNumber - 1];
 304     // next line start one character behind the lineEnd of the previous line
 305   }
 306   /**
 307    * Search the source position corresponding to the beginning of a given line number
 308    *
 309    * Line numbers are 1-based, and relative to the scanner initialPosition.
 310    * Character positions are 0-based.
 311    *
 312    * e.g.       getLineStart(1) --> 0   i.e. first line starts at character 0.
 313    *
 314    * In case the given line number is inconsistent, answers -1.
 315    */
 316   public final int getLineStart(int lineNumber) {
 317
 318     if (lineEnds == null)
 319       return -1;
 320     if (lineNumber >= lineEnds.length)
 321       return -1;
 322     if (lineNumber <= 0)
 323       return -1;
 324
 325     if (lineNumber == 1)
 326       return initialPosition;
 327     return lineEnds[lineNumber - 2] + 1;
 328     // next line start one character behind the lineEnd of the previous line
 329   }
 330   public final boolean getNextChar(char testedChar) {
 331     //BOOLEAN
 332     //handle the case of unicode.
 333     //when a unicode appears then we must use a buffer that holds char internal values
 334     //At the end of this method currentCharacter holds the new visited char
 335     //and currentPosition points right next after it
 336     //Both previous lines are true if the currentCharacter is == to the testedChar
 337     //On false, no side effect has occured.
 338
 339     //ALL getNextChar.... ARE OPTIMIZED COPIES
 340
 341     int temp = currentPosition;
 342     try {
 343       if (((currentCharacter = source[currentPosition++]) == '\\')
 344         && (source[currentPosition] == 'u')) {
 345         //-------------unicode traitement ------------
 346         int c1, c2, c3, c4;
 347         int unicodeSize = 6;
 348         currentPosition++;
 349         while (source[currentPosition] == 'u') {
 350           currentPosition++;
 351           unicodeSize++;
 352         }
 353
 354         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 355           || c1 < 0)
 356           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 357             || c2 < 0)
 358           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 359             || c3 < 0)
 360           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 361             || c4 < 0)) {
 362           currentPosition = temp;
 363           return false;
 364         }
 365
 366         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 367         if (currentCharacter != testedChar) {
 368           currentPosition = temp;
 369           return false;
 370         }
 371         unicodeAsBackSlash = currentCharacter == '\\';
 372
 373         //need the unicode buffer
 374         if (withoutUnicodePtr == 0) {
 375           //buffer all the entries that have been left aside....
 376           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 377           System.arraycopy(
 378             source,
 379             startPosition,
 380             withoutUnicodeBuffer,
 381             1,
 382             withoutUnicodePtr);
 383         }
 384         //fill the buffer with the char
 385         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 386         return true;
 387
 388       } //-------------end unicode traitement--------------
 389       else {
 390         if (currentCharacter != testedChar) {
 391           currentPosition = temp;
 392           return false;
 393         }
 394         unicodeAsBackSlash = false;
 395         if (withoutUnicodePtr != 0)
 396           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 397         return true;
 398       }
 399     } catch (IndexOutOfBoundsException e) {
 400       unicodeAsBackSlash = false;
 401       currentPosition = temp;
 402       return false;
 403     }
 404   }
 405   public final int getNextChar(char testedChar1, char testedChar2) {
 406     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
 407     //test can be done with (x==0) for the first and (x>0) for the second
 408     //handle the case of unicode.
 409     //when a unicode appears then we must use a buffer that holds char internal values
 410     //At the end of this method currentCharacter holds the new visited char
 411     //and currentPosition points right next after it
 412     //Both previous lines are true if the currentCharacter is == to the testedChar1/2
 413     //On false, no side effect has occured.
 414
 415     //ALL getNextChar.... ARE OPTIMIZED COPIES
 416
 417     int temp = currentPosition;
 418     try {
 419       int result;
 420       if (((currentCharacter = source[currentPosition++]) == '\\')
 421         && (source[currentPosition] == 'u')) {
 422         //-------------unicode traitement ------------
 423         int c1, c2, c3, c4;
 424         int unicodeSize = 6;
 425         currentPosition++;
 426         while (source[currentPosition] == 'u') {
 427           currentPosition++;
 428           unicodeSize++;
 429         }
 430
 431         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 432           || c1 < 0)
 433           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 434             || c2 < 0)
 435           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 436             || c3 < 0)
 437           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 438             || c4 < 0)) {
 439           currentPosition = temp;
 440           return 2;
 441         }
 442
 443         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 444         if (currentCharacter == testedChar1)
 445           result = 0;
 446         else if (currentCharacter == testedChar2)
 447           result = 1;
 448         else {
 449           currentPosition = temp;
 450           return -1;
 451         }
 452
 453         //need the unicode buffer
 454         if (withoutUnicodePtr == 0) {
 455           //buffer all the entries that have been left aside....
 456           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 457           System.arraycopy(
 458             source,
 459             startPosition,
 460             withoutUnicodeBuffer,
 461             1,
 462             withoutUnicodePtr);
 463         }
 464         //fill the buffer with the char
 465         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 466         return result;
 467       } //-------------end unicode traitement--------------
 468       else {
 469         if (currentCharacter == testedChar1)
 470           result = 0;
 471         else if (currentCharacter == testedChar2)
 472           result = 1;
 473         else {
 474           currentPosition = temp;
 475           return -1;
 476         }
 477
 478         if (withoutUnicodePtr != 0)
 479           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 480         return result;
 481       }
 482     } catch (IndexOutOfBoundsException e) {
 483       currentPosition = temp;
 484       return -1;
 485     }
 486   }
 487   public final boolean getNextCharAsDigit() {
 488     //BOOLEAN
 489     //handle the case of unicode.
 490     //when a unicode appears then we must use a buffer that holds char internal values
 491     //At the end of this method currentCharacter holds the new visited char
 492     //and currentPosition points right next after it
 493     //Both previous lines are true if the currentCharacter is a digit
 494     //On false, no side effect has occured.
 495
 496     //ALL getNextChar.... ARE OPTIMIZED COPIES
 497
 498     int temp = currentPosition;
 499     try {
 500       if (((currentCharacter = source[currentPosition++]) == '\\')
 501         && (source[currentPosition] == 'u')) {
 502         //-------------unicode traitement ------------
 503         int c1, c2, c3, c4;
 504         int unicodeSize = 6;
 505         currentPosition++;
 506         while (source[currentPosition] == 'u') {
 507           currentPosition++;
 508           unicodeSize++;
 509         }
 510
 511         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 512           || c1 < 0)
 513           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 514             || c2 < 0)
 515           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 516             || c3 < 0)
 517           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 518             || c4 < 0)) {
 519           currentPosition = temp;
 520           return false;
 521         }
 522
 523         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 524         if (!Character.isDigit(currentCharacter)) {
 525           currentPosition = temp;
 526           return false;
 527         }
 528
 529         //need the unicode buffer
 530         if (withoutUnicodePtr == 0) {
 531           //buffer all the entries that have been left aside....
 532           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 533           System.arraycopy(
 534             source,
 535             startPosition,
 536             withoutUnicodeBuffer,
 537             1,
 538             withoutUnicodePtr);
 539         }
 540         //fill the buffer with the char
 541         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 542         return true;
 543       } //-------------end unicode traitement--------------
 544       else {
 545         if (!Character.isDigit(currentCharacter)) {
 546           currentPosition = temp;
 547           return false;
 548         }
 549         if (withoutUnicodePtr != 0)
 550           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 551         return true;
 552       }
 553     } catch (IndexOutOfBoundsException e) {
 554       currentPosition = temp;
 555       return false;
 556     }
 557   }
 558   public final boolean getNextCharAsDigit(int radix) {
 559     //BOOLEAN
 560     //handle the case of unicode.
 561     //when a unicode appears then we must use a buffer that holds char internal values
 562     //At the end of this method currentCharacter holds the new visited char
 563     //and currentPosition points right next after it
 564     //Both previous lines are true if the currentCharacter is a digit base on radix
 565     //On false, no side effect has occured.
 566
 567     //ALL getNextChar.... ARE OPTIMIZED COPIES
 568
 569     int temp = currentPosition;
 570     try {
 571       if (((currentCharacter = source[currentPosition++]) == '\\')
 572         && (source[currentPosition] == 'u')) {
 573         //-------------unicode traitement ------------
 574         int c1, c2, c3, c4;
 575         int unicodeSize = 6;
 576         currentPosition++;
 577         while (source[currentPosition] == 'u') {
 578           currentPosition++;
 579           unicodeSize++;
 580         }
 581
 582         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 583           || c1 < 0)
 584           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 585             || c2 < 0)
 586           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 587             || c3 < 0)
 588           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 589             || c4 < 0)) {
 590           currentPosition = temp;
 591           return false;
 592         }
 593
 594         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 595         if (Character.digit(currentCharacter, radix) == -1) {
 596           currentPosition = temp;
 597           return false;
 598         }
 599
 600         //need the unicode buffer
 601         if (withoutUnicodePtr == 0) {
 602           //buffer all the entries that have been left aside....
 603           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 604           System.arraycopy(
 605             source,
 606             startPosition,
 607             withoutUnicodeBuffer,
 608             1,
 609             withoutUnicodePtr);
 610         }
 611         //fill the buffer with the char
 612         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 613         return true;
 614       } //-------------end unicode traitement--------------
 615       else {
 616         if (Character.digit(currentCharacter, radix) == -1) {
 617           currentPosition = temp;
 618           return false;
 619         }
 620         if (withoutUnicodePtr != 0)
 621           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 622         return true;
 623       }
 624     } catch (IndexOutOfBoundsException e) {
 625       currentPosition = temp;
 626       return false;
 627     }
 628   }
 629   public boolean getNextCharAsJavaIdentifierPart() {
 630     //BOOLEAN
 631     //handle the case of unicode.
 632     //when a unicode appears then we must use a buffer that holds char internal values
 633     //At the end of this method currentCharacter holds the new visited char
 634     //and currentPosition points right next after it
 635     //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
 636     //On false, no side effect has occured.
 637
 638     //ALL getNextChar.... ARE OPTIMIZED COPIES
 639
 640     int temp = currentPosition;
 641     try {
 642       if (((currentCharacter = source[currentPosition++]) == '\\')
 643         && (source[currentPosition] == 'u')) {
 644         //-------------unicode traitement ------------
 645         int c1, c2, c3, c4;
 646         int unicodeSize = 6;
 647         currentPosition++;
 648         while (source[currentPosition] == 'u') {
 649           currentPosition++;
 650           unicodeSize++;
 651         }
 652
 653         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 654           || c1 < 0)
 655           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 656             || c2 < 0)
 657           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 658             || c3 < 0)
 659           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 660             || c4 < 0)) {
 661           currentPosition = temp;
 662           return false;
 663         }
 664
 665         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 666         if (!Character.isJavaIdentifierPart(currentCharacter)) {
 667           currentPosition = temp;
 668           return false;
 669         }
 670
 671         //need the unicode buffer
 672         if (withoutUnicodePtr == 0) {
 673           //buffer all the entries that have been left aside....
 674           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 675           System.arraycopy(
 676             source,
 677             startPosition,
 678             withoutUnicodeBuffer,
 679             1,
 680             withoutUnicodePtr);
 681         }
 682         //fill the buffer with the char
 683         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 684         return true;
 685       } //-------------end unicode traitement--------------
 686       else {
 687         if (!Character.isJavaIdentifierPart(currentCharacter)) {
 688           currentPosition = temp;
 689           return false;
 690         }
 691
 692         if (withoutUnicodePtr != 0)
 693           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 694         return true;
 695       }
 696     } catch (IndexOutOfBoundsException e) {
 697       currentPosition = temp;
 698       return false;
 699     }
 700   }
 701
 702   public int getNextToken() throws InvalidInputException {
 703     try {
 704       while (!phpMode) {
 705         startPosition = currentPosition;
 706         currentCharacter = source[currentPosition++];
 707         if (currentCharacter == '<') {
 708           if (getNextChar('?')) {
 709             currentCharacter = source[currentPosition++];
 710             if ((currentCharacter == ' ')
 711               || Character.isWhitespace(currentCharacter)) {
 712               // <?
 713               startPosition = currentPosition;
 714               phpMode = true;
 715             } else {
 716               boolean phpStart =
 717                 (currentCharacter == 'P') || (currentCharacter == 'p');
 718               if (phpStart) {
 719                 int test = getNextChar('H', 'h');
 720                 if (test >= 0) {
 721                   test = getNextChar('P', 'p');
 722                   if (test >= 0) {
 723                     // <?PHP  <?php
 724                     startPosition = currentPosition;
 725                     phpMode = true;
 726                   }
 727                 }
 728               }
 729             }
 730           }
 731         }
 732
 733         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 734           if (recordLineSeparator) {
 735             pushLineSeparator();
 736           } else {
 737             currentLine = null;
 738           }
 739         }
 740       }
 741     } //-----------------end switch while try--------------------
 742     catch (IndexOutOfBoundsException e) {
 743       return TokenNameEOF;
 744     }
 745
 746     if (phpMode) {
 747       this.wasAcr = false;
 748       if (diet) {
 749         jumpOverMethodBody();
 750         diet = false;
 751         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
 752       }
 753       try {
 754         while (true) { //loop for jumping over comments
 755           withoutUnicodePtr = 0;
 756           //start with a new token (even comment written with unicode )
 757
 758           // ---------Consume white space and handles startPosition---------
 759           int whiteStart = currentPosition;
 760           boolean isWhiteSpace;
 761           do {
 762             startPosition = currentPosition;
 763             if (((currentCharacter = source[currentPosition++]) == '\\')
 764               && (source[currentPosition] == 'u')) {
 765               isWhiteSpace = jumpOverUnicodeWhiteSpace();
 766             } else {
 767               if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 768                 checkNonExternalizeString();
 769                 if (recordLineSeparator) {
 770                   pushLineSeparator();
 771                 } else {
 772                   currentLine = null;
 773                 }
 774               }
 775               isWhiteSpace =
 776                 (currentCharacter == ' ')
 777                   || Character.isWhitespace(currentCharacter);
 778             }
 779           } while (isWhiteSpace);
 780           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
 781             // reposition scanner in case we are interested by spaces as tokens
 782             currentPosition--;
 783             startPosition = whiteStart;
 784             return TokenNameWHITESPACE;
 785           }
 786           //little trick to get out in the middle of a source compuation
 787           if (currentPosition > eofPosition)
 788             return TokenNameEOF;
 789
 790           // ---------Identify the next token-------------
 791
 792           switch (currentCharacter) {
 793             case '(' :
 794               return TokenNameLPAREN;
 795             case ')' :
 796               return TokenNameRPAREN;
 797             case '{' :
 798               return TokenNameLBRACE;
 799             case '}' :
 800               return TokenNameRBRACE;
 801             case '[' :
 802               return TokenNameLBRACKET;
 803             case ']' :
 804               return TokenNameRBRACKET;
 805             case ';' :
 806               return TokenNameSEMICOLON;
 807             case ',' :
 808               return TokenNameCOMMA;
 809
 810             case '.' :
 811               if (getNextCharAsDigit())
 812                 return scanNumber(true);
 813               return TokenNameDOT;
 814             case '+' :
 815               {
 816                 int test;
 817                 if ((test = getNextChar('+', '=')) == 0)
 818                   return TokenNamePLUS_PLUS;
 819                 if (test > 0)
 820                   return TokenNamePLUS_EQUAL;
 821                 return TokenNamePLUS;
 822               }
 823             case '-' :
 824               {
 825                 int test;
 826                 if ((test = getNextChar('-', '=')) == 0)
 827                   return TokenNameMINUS_MINUS;
 828                 if (test > 0)
 829                   return TokenNameMINUS_EQUAL;
 830                 if (getNextChar('>'))
 831                   return TokenNameMINUS_GREATER;
 832
 833                 return TokenNameMINUS;
 834               }
 835             case '~' :
 836               if (getNextChar('='))
 837                 return TokenNameTWIDDLE_EQUAL;
 838               return TokenNameTWIDDLE;
 839             case '!' :
 840               if (getNextChar('='))
 841                 return TokenNameNOT_EQUAL;
 842               return TokenNameNOT;
 843             case '*' :
 844               if (getNextChar('='))
 845                 return TokenNameMULTIPLY_EQUAL;
 846               return TokenNameMULTIPLY;
 847             case '%' :
 848               if (getNextChar('='))
 849                 return TokenNameREMAINDER_EQUAL;
 850               return TokenNameREMAINDER;
 851             case '<' :
 852               {
 853                 int test;
 854                 if ((test = getNextChar('=', '<')) == 0)
 855                   return TokenNameLESS_EQUAL;
 856                 if (test > 0) {
 857                   if (getNextChar('='))
 858                     return TokenNameLEFT_SHIFT_EQUAL;
 859                   if (getNextChar('<')) {
 860                     int heredocStart = currentPosition;
 861                     int heredocLength = 0;
 862                     currentCharacter = source[currentPosition++];
 863                     if (Character.isJavaIdentifierStart(currentCharacter)) {
 864                       currentCharacter = source[currentPosition++];
 865                     } else {
 866                       return TokenNameERROR;
 867                     }
 868                     while (Character.isJavaIdentifierPart(currentCharacter)) {
 869                       currentCharacter = source[currentPosition++];
 870                     }
 871
 872                     heredocLength = currentPosition - heredocStart - 1;
 873
 874                     // heredoc end-tag determination
 875                     boolean endTag = true;
 876                     char ch;
 877                     do {
 878                       ch = source[currentPosition++];
 879                       if (ch == '\r' || ch == '\n') {
 880                         if (recordLineSeparator) {
 881                           pushLineSeparator();
 882                         } else {
 883                           currentLine = null;
 884                         }
 885                         for (int i = 0; i < heredocLength; i++) {
 886                           if (source[currentPosition + i]
 887                             != source[heredocStart + i]) {
 888                             endTag = false;
 889                             break;
 890                           }
 891                         }
 892                         if (endTag) {
 893                           currentPosition += heredocLength - 1;
 894                           currentCharacter = source[currentPosition++];
 895                           break; // do...while loop
 896                         } else {
 897                           endTag = true;
 898                         }
 899                       }
 900
 901                     } while (true);
 902
 903                     return TokenNameHEREDOC;
 904                   }
 905                   return TokenNameLEFT_SHIFT;
 906                 }
 907                 return TokenNameLESS;
 908               }
 909             case '>' :
 910               {
 911                 int test;
 912                 if ((test = getNextChar('=', '>')) == 0)
 913                   return TokenNameGREATER_EQUAL;
 914                 if (test > 0) {
 915                   if ((test = getNextChar('=', '>')) == 0)
 916                     return TokenNameRIGHT_SHIFT_EQUAL;
 917                   return TokenNameRIGHT_SHIFT;
 918                 }
 919                 return TokenNameGREATER;
 920               }
 921             case '=' :
 922               if (getNextChar('='))
 923                 return TokenNameEQUAL_EQUAL;
 924               if (getNextChar('>'))
 925                 return TokenNameEQUAL_GREATER;
 926               return TokenNameEQUAL;
 927             case '&' :
 928               {
 929                 int test;
 930                 if ((test = getNextChar('&', '=')) == 0)
 931                   return TokenNameAND_AND;
 932                 if (test > 0)
 933                   return TokenNameAND_EQUAL;
 934                 return TokenNameAND;
 935               }
 936             case '|' :
 937               {
 938                 int test;
 939                 if ((test = getNextChar('|', '=')) == 0)
 940                   return TokenNameOR_OR;
 941                 if (test > 0)
 942                   return TokenNameOR_EQUAL;
 943                 return TokenNameOR;
 944               }
 945             case '^' :
 946               if (getNextChar('='))
 947                 return TokenNameXOR_EQUAL;
 948               return TokenNameXOR;
 949             case '?' :
 950               if (getNextChar('>')) {
 951                 phpMode = false;
 952                 return TokenNameStopPHP;
 953               }
 954               return TokenNameQUESTION;
 955             case ':' :
 956               if (getNextChar(':'))
 957                 return TokenNameCOLON_COLON;
 958               return TokenNameCOLON;
 959             case '@' :
 960               return TokenNameAT;
 961               //                                        case '\'' :
 962               //                                                {
 963               //                                                        int test;
 964               //                                                        if ((test = getNextChar('\n', '\r')) == 0) {
 965               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
 966               //                                                        }
 967               //                                                        if (test > 0) {
 968               //                                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 969               //                                                                for (int lookAhead = 0;
 970               //                                                                        lookAhead < 3;
 971               //                                                                        lookAhead++) {
 972               //                                                                        if (currentPosition + lookAhead
 973               //                                                                                == source.length)
 974               //                                                                                break;
 975               //                                                                        if (source[currentPosition + lookAhead]
 976               //                                                                                == '\n')
 977               //                                                                                break;
 978               //                                                                        if (source[currentPosition + lookAhead]
 979               //                                                                                == '\'') {
 980               //                                                                                currentPosition += lookAhead + 1;
 981               //                                                                                break;
 982               //                                                                        }
 983               //                                                                }
 984               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
 985               //                                                        }
 986               //                                                }
 987               //                                                if (getNextChar('\'')) {
 988               //                                                        // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 989               //                                                        for (int lookAhead = 0;
 990               //                                                                lookAhead < 3;
 991               //                                                                lookAhead++) {
 992               //                                                                if (currentPosition + lookAhead
 993               //                                                                        == source.length)
 994               //                                                                        break;
 995               //                                                                if (source[currentPosition + lookAhead]
 996               //                                                                        == '\n')
 997               //                                                                        break;
 998               //                                                                if (source[currentPosition + lookAhead]
 999               //                                                                        == '\'') {
1000               //                                                                        currentPosition += lookAhead + 1;
1001               //                                                                        break;
1002               //                                                                }
1003               //                                                        }
1004               //                                                        throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1005               //                                                }
1006               //                                                if (getNextChar('\\'))
1007               //                                                        scanEscapeCharacter();
1008               //                                                else { // consume next character
1009               //                                                        unicodeAsBackSlash = false;
1010               //                                                        if (((currentCharacter = source[currentPosition++])
1011               //                                                                == '\\')
1012               //                                                                && (source[currentPosition] == 'u')) {
1013               //                                                                getNextUnicodeChar();
1014               //                                                        } else {
1015               //                                                                if (withoutUnicodePtr != 0) {
1016               //                                                                        withoutUnicodeBuffer[++withoutUnicodePtr] =
1017               //                                                                                currentCharacter;
1018               //                                                                }
1019               //                                                        }
1020               //                                                }
1021               //                                                //            if (getNextChar('\''))
1022               //                                                //              return TokenNameCharacterLiteral;
1023               //                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1024               //                                                for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1025               //                                                        if (currentPosition + lookAhead == source.length)
1026               //                                                                break;
1027               //                                                        if (source[currentPosition + lookAhead] == '\n')
1028               //                                                                break;
1029               //                                                        if (source[currentPosition + lookAhead] == '\'') {
1030               //                                                                currentPosition += lookAhead + 1;
1031               //                                                                break;
1032               //                                                        }
1033               //                                                }
1034               //                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1035             case '\'' :
1036               try {
1037                 // consume next character
1038                 unicodeAsBackSlash = false;
1039                 if (((currentCharacter = source[currentPosition++]) == '\\')
1040                   && (source[currentPosition] == 'u')) {
1041                   getNextUnicodeChar();
1042                 } else {
1043                   if (withoutUnicodePtr != 0) {
1044                     withoutUnicodeBuffer[++withoutUnicodePtr] =
1045                       currentCharacter;
1046                   }
1047                 }
1048
1049                 while (currentCharacter != '\'') {
1050
1051                   /**** in PHP \r and \n are valid in string literals ****/
1052 //                  if ((currentCharacter == '\n')
1053 //                    || (currentCharacter == '\r')) {
1054 //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1055 //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1056 //                      if (currentPosition + lookAhead == source.length)
1057 //                        break;
1058 //                      if (source[currentPosition + lookAhead] == '\n')
1059 //                        break;
1060 //                      if (source[currentPosition + lookAhead] == '\"') {
1061 //                        currentPosition += lookAhead + 1;
1062 //                        break;
1063 //                      }
1064 //                    }
1065 //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1066 //                  }
1067                   if (currentCharacter == '\\') {
1068                     int escapeSize = currentPosition;
1069                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1070                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1071                     scanEscapeCharacter();
1072                     escapeSize = currentPosition - escapeSize;
1073                     if (withoutUnicodePtr == 0) {
1074                       //buffer all the entries that have been left aside....
1075                       withoutUnicodePtr =
1076                         currentPosition - escapeSize - 1 - startPosition;
1077                       System.arraycopy(
1078                         source,
1079                         startPosition,
1080                         withoutUnicodeBuffer,
1081                         1,
1082                         withoutUnicodePtr);
1083                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1084                         currentCharacter;
1085                     } else { //overwrite the / in the buffer
1086                       withoutUnicodeBuffer[withoutUnicodePtr] =
1087                         currentCharacter;
1088                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1089                         withoutUnicodePtr--;
1090                       }
1091                     }
1092                   }
1093                   // consume next character
1094                   unicodeAsBackSlash = false;
1095                   if (((currentCharacter = source[currentPosition++]) == '\\')
1096                     && (source[currentPosition] == 'u')) {
1097                     getNextUnicodeChar();
1098                   } else {
1099                     if (withoutUnicodePtr != 0) {
1100                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1101                         currentCharacter;
1102                     }
1103                   }
1104
1105                 }
1106               } catch (IndexOutOfBoundsException e) {
1107                 throw new InvalidInputException(UNTERMINATED_STRING);
1108               } catch (InvalidInputException e) {
1109                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1110                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1111                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1112                     if (currentPosition + lookAhead == source.length)
1113                       break;
1114                     if (source[currentPosition + lookAhead] == '\n')
1115                       break;
1116                     if (source[currentPosition + lookAhead] == '\'') {
1117                       currentPosition += lookAhead + 1;
1118                       break;
1119                     }
1120                   }
1121
1122                 }
1123                 throw e; // rethrow
1124               }
1125               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1126                 if (currentLine == null) {
1127                   currentLine = new NLSLine();
1128                   lines.add(currentLine);
1129                 }
1130                 currentLine.add(
1131                   new StringLiteral(
1132                     getCurrentTokenSourceString(),
1133                     startPosition,
1134                     currentPosition - 1));
1135               }
1136               return TokenNameStringConstant;
1137             case '"' :
1138               try {
1139                 // consume next character
1140                 unicodeAsBackSlash = false;
1141                 if (((currentCharacter = source[currentPosition++]) == '\\')
1142                   && (source[currentPosition] == 'u')) {
1143                   getNextUnicodeChar();
1144                 } else {
1145                   if (withoutUnicodePtr != 0) {
1146                     withoutUnicodeBuffer[++withoutUnicodePtr] =
1147                       currentCharacter;
1148                   }
1149                 }
1150
1151                 while (currentCharacter != '"') {
1152
1153
1154                   /**** in PHP \r and \n are valid in string literals ****/
1155 //                  if ((currentCharacter == '\n')
1156 //                    || (currentCharacter == '\r')) {
1157 //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1158 //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1159 //                      if (currentPosition + lookAhead == source.length)
1160 //                        break;
1161 //                      if (source[currentPosition + lookAhead] == '\n')
1162 //                        break;
1163 //                      if (source[currentPosition + lookAhead] == '\"') {
1164 //                        currentPosition += lookAhead + 1;
1165 //                        break;
1166 //                      }
1167 //                    }
1168 //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1169 //                  }
1170                   if (currentCharacter == '\\') {
1171                     int escapeSize = currentPosition;
1172                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1173                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1174                     scanEscapeCharacter();
1175                     escapeSize = currentPosition - escapeSize;
1176                     if (withoutUnicodePtr == 0) {
1177                       //buffer all the entries that have been left aside....
1178                       withoutUnicodePtr =
1179                         currentPosition - escapeSize - 1 - startPosition;
1180                       System.arraycopy(
1181                         source,
1182                         startPosition,
1183                         withoutUnicodeBuffer,
1184                         1,
1185                         withoutUnicodePtr);
1186                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1187                         currentCharacter;
1188                     } else { //overwrite the / in the buffer
1189                       withoutUnicodeBuffer[withoutUnicodePtr] =
1190                         currentCharacter;
1191                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1192                         withoutUnicodePtr--;
1193                       }
1194                     }
1195                   }
1196                   // consume next character
1197                   unicodeAsBackSlash = false;
1198                   if (((currentCharacter = source[currentPosition++]) == '\\')
1199                     && (source[currentPosition] == 'u')) {
1200                     getNextUnicodeChar();
1201                   } else {
1202                     if (withoutUnicodePtr != 0) {
1203                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1204                         currentCharacter;
1205                     }
1206                   }
1207
1208                 }
1209               } catch (IndexOutOfBoundsException e) {
1210                 throw new InvalidInputException(UNTERMINATED_STRING);
1211               } catch (InvalidInputException e) {
1212                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1213                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1214                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1215                     if (currentPosition + lookAhead == source.length)
1216                       break;
1217                     if (source[currentPosition + lookAhead] == '\n')
1218                       break;
1219                     if (source[currentPosition + lookAhead] == '\"') {
1220                       currentPosition += lookAhead + 1;
1221                       break;
1222                     }
1223                   }
1224
1225                 }
1226                 throw e; // rethrow
1227               }
1228               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1229                 if (currentLine == null) {
1230                   currentLine = new NLSLine();
1231                   lines.add(currentLine);
1232                 }
1233                 currentLine.add(
1234                   new StringLiteral(
1235                     getCurrentTokenSourceString(),
1236                     startPosition,
1237                     currentPosition - 1));
1238               }
1239               return TokenNameStringLiteral;
1240             case '`' :
1241             try {
1242               // consume next character
1243               unicodeAsBackSlash = false;
1244               if (((currentCharacter = source[currentPosition++]) == '\\')
1245                 && (source[currentPosition] == 'u')) {
1246                 getNextUnicodeChar();
1247               } else {
1248                 if (withoutUnicodePtr != 0) {
1249                   withoutUnicodeBuffer[++withoutUnicodePtr] =
1250                     currentCharacter;
1251                 }
1252               }
1253
1254               while (currentCharacter != '`') {
1255
1256
1257                 /**** in PHP \r and \n are valid in string literals ****/
1258 //                if ((currentCharacter == '\n')
1259 //                  || (currentCharacter == '\r')) {
1260 //                  // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1261 //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1262 //                    if (currentPosition + lookAhead == source.length)
1263 //                      break;
1264 //                    if (source[currentPosition + lookAhead] == '\n')
1265 //                      break;
1266 //                    if (source[currentPosition + lookAhead] == '\"') {
1267 //                      currentPosition += lookAhead + 1;
1268 //                      break;
1269 //                    }
1270 //                  }
1271 //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1272 //                }
1273                 if (currentCharacter == '\\') {
1274                   int escapeSize = currentPosition;
1275                   boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1276                   //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1277                   scanEscapeCharacter();
1278                   escapeSize = currentPosition - escapeSize;
1279                   if (withoutUnicodePtr == 0) {
1280                     //buffer all the entries that have been left aside....
1281                     withoutUnicodePtr =
1282                       currentPosition - escapeSize - 1 - startPosition;
1283                     System.arraycopy(
1284                       source,
1285                       startPosition,
1286                       withoutUnicodeBuffer,
1287                       1,
1288                       withoutUnicodePtr);
1289                     withoutUnicodeBuffer[++withoutUnicodePtr] =
1290                       currentCharacter;
1291                   } else { //overwrite the / in the buffer
1292                     withoutUnicodeBuffer[withoutUnicodePtr] =
1293                       currentCharacter;
1294                     if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1295                       withoutUnicodePtr--;
1296                     }
1297                   }
1298                 }
1299                 // consume next character
1300                 unicodeAsBackSlash = false;
1301                 if (((currentCharacter = source[currentPosition++]) == '\\')
1302                   && (source[currentPosition] == 'u')) {
1303                   getNextUnicodeChar();
1304                 } else {
1305                   if (withoutUnicodePtr != 0) {
1306                     withoutUnicodeBuffer[++withoutUnicodePtr] =
1307                       currentCharacter;
1308                   }
1309                 }
1310
1311               }
1312             } catch (IndexOutOfBoundsException e) {
1313               throw new InvalidInputException(UNTERMINATED_STRING);
1314             } catch (InvalidInputException e) {
1315               if (e.getMessage().equals(INVALID_ESCAPE)) {
1316                 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1317                 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1318                   if (currentPosition + lookAhead == source.length)
1319                     break;
1320                   if (source[currentPosition + lookAhead] == '\n')
1321                     break;
1322                   if (source[currentPosition + lookAhead] == '`') {
1323                     currentPosition += lookAhead + 1;
1324                     break;
1325                   }
1326                 }
1327
1328               }
1329               throw e; // rethrow
1330             }
1331             if (checkNonExternalizedStringLiterals) { // check for presence of  NLS tags //$NON-NLS-?$ where ? is an int.
1332               if (currentLine == null) {
1333                 currentLine = new NLSLine();
1334                 lines.add(currentLine);
1335               }
1336               currentLine.add(
1337                 new StringLiteral(
1338                   getCurrentTokenSourceString(),
1339                   startPosition,
1340                   currentPosition - 1));
1341             }
1342             return TokenNameStringInterpolated;
1343             case '#' :
1344             case '/' :
1345               {
1346                 int test;
1347                 if ((currentCharacter == '#')
1348                   || (test = getNextChar('/', '*')) == 0) {
1349                   //line comment
1350                   int endPositionForLineComment = 0;
1351                   try { //get the next char
1352                     if (((currentCharacter = source[currentPosition++])
1353                       == '\\')
1354                       && (source[currentPosition] == 'u')) {
1355                       //-------------unicode traitement ------------
1356                       int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1357                       currentPosition++;
1358                       while (source[currentPosition] == 'u') {
1359                         currentPosition++;
1360                       }
1361                       if ((c1 =
1362                         Character.getNumericValue(source[currentPosition++]))
1363                         > 15
1364                         || c1 < 0
1365                         || (c2 =
1366                           Character.getNumericValue(source[currentPosition++]))
1367                           > 15
1368                         || c2 < 0
1369                         || (c3 =
1370                           Character.getNumericValue(source[currentPosition++]))
1371                           > 15
1372                         || c3 < 0
1373                         || (c4 =
1374                           Character.getNumericValue(source[currentPosition++]))
1375                           > 15
1376                         || c4 < 0) {
1377                         throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1378                       } else {
1379                         currentCharacter =
1380                           (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1381                       }
1382                     }
1383
1384                     //handle the \\u case manually into comment
1385                     if (currentCharacter == '\\') {
1386                       if (source[currentPosition] == '\\')
1387                         currentPosition++;
1388                     } //jump over the \\
1389                     boolean isUnicode = false;
1390                     while (currentCharacter != '\r'
1391                       && currentCharacter != '\n') {
1392                       if (currentCharacter == '?') {
1393                         if (getNextChar('>')) {
1394                           startPosition = currentPosition - 2;
1395                           phpMode = false;
1396                           return TokenNameStopPHP;
1397                         }
1398                       }
1399
1400                       //get the next char
1401                       isUnicode = false;
1402                       if (((currentCharacter = source[currentPosition++])
1403                         == '\\')
1404                         && (source[currentPosition] == 'u')) {
1405                         isUnicode = true;
1406                         //-------------unicode traitement ------------
1407                         int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1408                         currentPosition++;
1409                         while (source[currentPosition] == 'u') {
1410                           currentPosition++;
1411                         }
1412                         if ((c1 =
1413                           Character.getNumericValue(source[currentPosition++]))
1414                           > 15
1415                           || c1 < 0
1416                           || (c2 =
1417                             Character.getNumericValue(
1418                               source[currentPosition++]))
1419                             > 15
1420                           || c2 < 0
1421                           || (c3 =
1422                             Character.getNumericValue(
1423                               source[currentPosition++]))
1424                             > 15
1425                           || c3 < 0
1426                           || (c4 =
1427                             Character.getNumericValue(
1428                               source[currentPosition++]))
1429                             > 15
1430                           || c4 < 0) {
1431                           throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1432                         } else {
1433                           currentCharacter =
1434                             (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1435                         }
1436                       }
1437                       //handle the \\u case manually into comment
1438                       if (currentCharacter == '\\') {
1439                         if (source[currentPosition] == '\\')
1440                           currentPosition++;
1441                       } //jump over the \\
1442                     }
1443                     if (isUnicode) {
1444                       endPositionForLineComment = currentPosition - 6;
1445                     } else {
1446                       endPositionForLineComment = currentPosition - 1;
1447                     }
1448                     recordComment(false);
1449                     if ((currentCharacter == '\r')
1450                       || (currentCharacter == '\n')) {
1451                       checkNonExternalizeString();
1452                       if (recordLineSeparator) {
1453                         if (isUnicode) {
1454                           pushUnicodeLineSeparator();
1455                         } else {
1456                           pushLineSeparator();
1457                         }
1458                       } else {
1459                         currentLine = null;
1460                       }
1461                     }
1462                     if (tokenizeComments) {
1463                       if (!isUnicode) {
1464                         currentPosition = endPositionForLineComment;
1465                         // reset one character behind
1466                       }
1467                       return TokenNameCOMMENT_LINE;
1468                     }
1469                   } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1470                     if (tokenizeComments) {
1471                       currentPosition--;
1472                       // reset one character behind
1473                       return TokenNameCOMMENT_LINE;
1474                     }
1475                   }
1476                   break;
1477                 }
1478                 if (test > 0) {
1479                   //traditional and annotation comment
1480                   boolean isJavadoc = false, star = false;
1481                   // consume next character
1482                   unicodeAsBackSlash = false;
1483                   if (((currentCharacter = source[currentPosition++]) == '\\')
1484                     && (source[currentPosition] == 'u')) {
1485                     getNextUnicodeChar();
1486                   } else {
1487                     if (withoutUnicodePtr != 0) {
1488                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1489                         currentCharacter;
1490                     }
1491                   }
1492
1493                   if (currentCharacter == '*') {
1494                     isJavadoc = true;
1495                     star = true;
1496                   }
1497                   if ((currentCharacter == '\r')
1498                     || (currentCharacter == '\n')) {
1499                     checkNonExternalizeString();
1500                     if (recordLineSeparator) {
1501                       pushLineSeparator();
1502                     } else {
1503                       currentLine = null;
1504                     }
1505                   }
1506                   try { //get the next char
1507                     if (((currentCharacter = source[currentPosition++])
1508                       == '\\')
1509                       && (source[currentPosition] == 'u')) {
1510                       //-------------unicode traitement ------------
1511                       getNextUnicodeChar();
1512                     }
1513                     //handle the \\u case manually into comment
1514                     if (currentCharacter == '\\') {
1515                       if (source[currentPosition] == '\\')
1516                         currentPosition++;
1517                       //jump over the \\
1518                     }
1519                     // empty comment is not a javadoc /**/
1520                     if (currentCharacter == '/') {
1521                       isJavadoc = false;
1522                     }
1523                     //loop until end of comment */
1524                     while ((currentCharacter != '/') || (!star)) {
1525                       if ((currentCharacter == '\r')
1526                         || (currentCharacter == '\n')) {
1527                         checkNonExternalizeString();
1528                         if (recordLineSeparator) {
1529                           pushLineSeparator();
1530                         } else {
1531                           currentLine = null;
1532                         }
1533                       }
1534                       star = currentCharacter == '*';
1535                       //get next char
1536                       if (((currentCharacter = source[currentPosition++])
1537                         == '\\')
1538                         && (source[currentPosition] == 'u')) {
1539                         //-------------unicode traitement ------------
1540                         getNextUnicodeChar();
1541                       }
1542                       //handle the \\u case manually into comment
1543                       if (currentCharacter == '\\') {
1544                         if (source[currentPosition] == '\\')
1545                           currentPosition++;
1546                       } //jump over the \\
1547                     }
1548                     recordComment(isJavadoc);
1549                     if (tokenizeComments) {
1550                       if (isJavadoc)
1551                         return TokenNameCOMMENT_PHPDOC;
1552                       return TokenNameCOMMENT_BLOCK;
1553                     }
1554                   } catch (IndexOutOfBoundsException e) {
1555                     throw new InvalidInputException(UNTERMINATED_COMMENT);
1556                   }
1557                   break;
1558                 }
1559                 if (getNextChar('='))
1560                   return TokenNameDIVIDE_EQUAL;
1561                 return TokenNameDIVIDE;
1562               }
1563             case '\u001a' :
1564               if (atEnd())
1565                 return TokenNameEOF;
1566               //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1567               throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1568
1569             default :
1570               if (currentCharacter == '$') {
1571                 if (getNextChar('{'))
1572                   return TokenNameDOLLAR_LBRACE;
1573                 return scanIdentifierOrKeyword(true);
1574               }
1575               if (Character.isJavaIdentifierStart(currentCharacter))
1576                 return scanIdentifierOrKeyword(false);
1577               if (Character.isDigit(currentCharacter))
1578                 return scanNumber(false);
1579               return TokenNameERROR;
1580           }
1581         }
1582       } //-----------------end switch while try--------------------
1583       catch (IndexOutOfBoundsException e) {
1584       }
1585     }
1586     return TokenNameEOF;
1587   }
1588
1589   public final void getNextUnicodeChar()
1590     throws IndexOutOfBoundsException, InvalidInputException {
1591     //VOID
1592     //handle the case of unicode.
1593     //when a unicode appears then we must use a buffer that holds char internal values
1594     //At the end of this method currentCharacter holds the new visited char
1595     //and currentPosition points right next after it
1596
1597     //ALL getNextChar.... ARE OPTIMIZED COPIES
1598
1599     int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1600     currentPosition++;
1601     while (source[currentPosition] == 'u') {
1602       currentPosition++;
1603       unicodeSize++;
1604     }
1605
1606     if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1607       || c1 < 0
1608       || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1609       || c2 < 0
1610       || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1611       || c3 < 0
1612       || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1613       || c4 < 0) {
1614       throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1615     } else {
1616       currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1617       //need the unicode buffer
1618       if (withoutUnicodePtr == 0) {
1619         //buffer all the entries that have been left aside....
1620         withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1621         System.arraycopy(
1622           source,
1623           startPosition,
1624           withoutUnicodeBuffer,
1625           1,
1626           withoutUnicodePtr);
1627       }
1628       //fill the buffer with the char
1629       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1630     }
1631     unicodeAsBackSlash = currentCharacter == '\\';
1632   }
1633   /* Tokenize a method body, assuming that curly brackets are properly balanced.
1634    */
1635   public final void jumpOverMethodBody() {
1636
1637     this.wasAcr = false;
1638     int found = 1;
1639     try {
1640       while (true) { //loop for jumping over comments
1641         // ---------Consume white space and handles startPosition---------
1642         boolean isWhiteSpace;
1643         do {
1644           startPosition = currentPosition;
1645           if (((currentCharacter = source[currentPosition++]) == '\\')
1646             && (source[currentPosition] == 'u')) {
1647             isWhiteSpace = jumpOverUnicodeWhiteSpace();
1648           } else {
1649             if (recordLineSeparator
1650               && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1651               pushLineSeparator();
1652             isWhiteSpace = Character.isWhitespace(currentCharacter);
1653           }
1654         } while (isWhiteSpace);
1655
1656         // -------consume token until } is found---------
1657         switch (currentCharacter) {
1658           case '{' :
1659             found++;
1660             break;
1661           case '}' :
1662             found--;
1663             if (found == 0)
1664               return;
1665             break;
1666           case '\'' :
1667             {
1668               boolean test;
1669               test = getNextChar('\\');
1670               if (test) {
1671                 try {
1672                   scanEscapeCharacter();
1673                 } catch (InvalidInputException ex) {
1674                 };
1675               } else {
1676                 try { // consume next character
1677                   unicodeAsBackSlash = false;
1678                   if (((currentCharacter = source[currentPosition++]) == '\\')
1679                     && (source[currentPosition] == 'u')) {
1680                     getNextUnicodeChar();
1681                   } else {
1682                     if (withoutUnicodePtr != 0) {
1683                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1684                         currentCharacter;
1685                     }
1686                   }
1687                 } catch (InvalidInputException ex) {
1688                 };
1689               }
1690               getNextChar('\'');
1691               break;
1692             }
1693           case '"' :
1694             try {
1695               try { // consume next character
1696                 unicodeAsBackSlash = false;
1697                 if (((currentCharacter = source[currentPosition++]) == '\\')
1698                   && (source[currentPosition] == 'u')) {
1699                   getNextUnicodeChar();
1700                 } else {
1701                   if (withoutUnicodePtr != 0) {
1702                     withoutUnicodeBuffer[++withoutUnicodePtr] =
1703                       currentCharacter;
1704                   }
1705                 }
1706               } catch (InvalidInputException ex) {
1707               };
1708               while (currentCharacter != '"') {
1709                 if (currentCharacter == '\r') {
1710                   if (source[currentPosition] == '\n')
1711                     currentPosition++;
1712                   break;
1713                   // the string cannot go further that the line
1714                 }
1715                 if (currentCharacter == '\n') {
1716                   break;
1717                   // the string cannot go further that the line
1718                 }
1719                 if (currentCharacter == '\\') {
1720                   try {
1721                     scanEscapeCharacter();
1722                   } catch (InvalidInputException ex) {
1723                   };
1724                 }
1725                 try { // consume next character
1726                   unicodeAsBackSlash = false;
1727                   if (((currentCharacter = source[currentPosition++]) == '\\')
1728                     && (source[currentPosition] == 'u')) {
1729                     getNextUnicodeChar();
1730                   } else {
1731                     if (withoutUnicodePtr != 0) {
1732                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1733                         currentCharacter;
1734                     }
1735                   }
1736                 } catch (InvalidInputException ex) {
1737                 };
1738               }
1739             } catch (IndexOutOfBoundsException e) {
1740               return;
1741             }
1742             break;
1743           case '/' :
1744             {
1745               int test;
1746               if ((test = getNextChar('/', '*')) == 0) {
1747                 //line comment
1748                 try {
1749                   //get the next char
1750                   if (((currentCharacter = source[currentPosition++]) == '\\')
1751                     && (source[currentPosition] == 'u')) {
1752                     //-------------unicode traitement ------------
1753                     int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1754                     currentPosition++;
1755                     while (source[currentPosition] == 'u') {
1756                       currentPosition++;
1757                     }
1758                     if ((c1 =
1759                       Character.getNumericValue(source[currentPosition++]))
1760                       > 15
1761                       || c1 < 0
1762                       || (c2 =
1763                         Character.getNumericValue(source[currentPosition++]))
1764                         > 15
1765                       || c2 < 0
1766                       || (c3 =
1767                         Character.getNumericValue(source[currentPosition++]))
1768                         > 15
1769                       || c3 < 0
1770                       || (c4 =
1771                         Character.getNumericValue(source[currentPosition++]))
1772                         > 15
1773                       || c4 < 0) {
1774                       //error don't care of the value
1775                       currentCharacter = 'A';
1776                     } //something different from \n and \r
1777                     else {
1778                       currentCharacter =
1779                         (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1780                     }
1781                   }
1782
1783                   while (currentCharacter != '\r'
1784                     && currentCharacter != '\n') {
1785                     //get the next char
1786                     if (((currentCharacter = source[currentPosition++])
1787                       == '\\')
1788                       && (source[currentPosition] == 'u')) {
1789                       //-------------unicode traitement ------------
1790                       int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1791                       currentPosition++;
1792                       while (source[currentPosition] == 'u') {
1793                         currentPosition++;
1794                       }
1795                       if ((c1 =
1796                         Character.getNumericValue(source[currentPosition++]))
1797                         > 15
1798                         || c1 < 0
1799                         || (c2 =
1800                           Character.getNumericValue(source[currentPosition++]))
1801                           > 15
1802                         || c2 < 0
1803                         || (c3 =
1804                           Character.getNumericValue(source[currentPosition++]))
1805                           > 15
1806                         || c3 < 0
1807                         || (c4 =
1808                           Character.getNumericValue(source[currentPosition++]))
1809                           > 15
1810                         || c4 < 0) {
1811                         //error don't care of the value
1812                         currentCharacter = 'A';
1813                       } //something different from \n and \r
1814                       else {
1815                         currentCharacter =
1816                           (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1817                       }
1818                     }
1819                   }
1820                   if (recordLineSeparator
1821                     && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1822                     pushLineSeparator();
1823                 } catch (IndexOutOfBoundsException e) {
1824                 } //an eof will them be generated
1825                 break;
1826               }
1827               if (test > 0) {
1828                 //traditional and annotation comment
1829                 boolean star = false;
1830                 try { // consume next character
1831                   unicodeAsBackSlash = false;
1832                   if (((currentCharacter = source[currentPosition++]) == '\\')
1833                     && (source[currentPosition] == 'u')) {
1834                     getNextUnicodeChar();
1835                   } else {
1836                     if (withoutUnicodePtr != 0) {
1837                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1838                         currentCharacter;
1839                     }
1840                   };
1841                 } catch (InvalidInputException ex) {
1842                 };
1843                 if (currentCharacter == '*') {
1844                   star = true;
1845                 }
1846                 if (recordLineSeparator
1847                   && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1848                   pushLineSeparator();
1849                 try { //get the next char
1850                   if (((currentCharacter = source[currentPosition++]) == '\\')
1851                     && (source[currentPosition] == 'u')) {
1852                     //-------------unicode traitement ------------
1853                     int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1854                     currentPosition++;
1855                     while (source[currentPosition] == 'u') {
1856                       currentPosition++;
1857                     }
1858                     if ((c1 =
1859                       Character.getNumericValue(source[currentPosition++]))
1860                       > 15
1861                       || c1 < 0
1862                       || (c2 =
1863                         Character.getNumericValue(source[currentPosition++]))
1864                         > 15
1865                       || c2 < 0
1866                       || (c3 =
1867                         Character.getNumericValue(source[currentPosition++]))
1868                         > 15
1869                       || c3 < 0
1870                       || (c4 =
1871                         Character.getNumericValue(source[currentPosition++]))
1872                         > 15
1873                       || c4 < 0) {
1874                       //error don't care of the value
1875                       currentCharacter = 'A';
1876                     } //something different from * and /
1877                     else {
1878                       currentCharacter =
1879                         (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1880                     }
1881                   }
1882                   //loop until end of comment */
1883                   while ((currentCharacter != '/') || (!star)) {
1884                     if (recordLineSeparator
1885                       && ((currentCharacter == '\r')
1886                         || (currentCharacter == '\n')))
1887                       pushLineSeparator();
1888                     star = currentCharacter == '*';
1889                     //get next char
1890                     if (((currentCharacter = source[currentPosition++])
1891                       == '\\')
1892                       && (source[currentPosition] == 'u')) {
1893                       //-------------unicode traitement ------------
1894                       int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1895                       currentPosition++;
1896                       while (source[currentPosition] == 'u') {
1897                         currentPosition++;
1898                       }
1899                       if ((c1 =
1900                         Character.getNumericValue(source[currentPosition++]))
1901                         > 15
1902                         || c1 < 0
1903                         || (c2 =
1904                           Character.getNumericValue(source[currentPosition++]))
1905                           > 15
1906                         || c2 < 0
1907                         || (c3 =
1908                           Character.getNumericValue(source[currentPosition++]))
1909                           > 15
1910                         || c3 < 0
1911                         || (c4 =
1912                           Character.getNumericValue(source[currentPosition++]))
1913                           > 15
1914                         || c4 < 0) {
1915                         //error don't care of the value
1916                         currentCharacter = 'A';
1917                       } //something different from * and /
1918                       else {
1919                         currentCharacter =
1920                           (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1921                       }
1922                     }
1923                   }
1924                 } catch (IndexOutOfBoundsException e) {
1925                   return;
1926                 }
1927                 break;
1928               }
1929               break;
1930             }
1931
1932           default :
1933             if (Character.isJavaIdentifierStart(currentCharacter)
1934               || currentCharacter == '$') {
1935               try {
1936                 scanIdentifierOrKeyword((currentCharacter == '$'));
1937               } catch (InvalidInputException ex) {
1938               };
1939               break;
1940             }
1941             if (Character.isDigit(currentCharacter)) {
1942               try {
1943                 scanNumber(false);
1944               } catch (InvalidInputException ex) {
1945               };
1946               break;
1947             }
1948         }
1949       }
1950       //-----------------end switch while try--------------------
1951     } catch (IndexOutOfBoundsException e) {
1952     } catch (InvalidInputException e) {
1953     }
1954     return;
1955   }
1956   public final boolean jumpOverUnicodeWhiteSpace()
1957     throws InvalidInputException {
1958     //BOOLEAN
1959     //handle the case of unicode. Jump over the next whiteSpace
1960     //making startPosition pointing on the next available char
1961     //On false, the currentCharacter is filled up with a potential
1962     //correct char
1963
1964     try {
1965       this.wasAcr = false;
1966       int c1, c2, c3, c4;
1967       int unicodeSize = 6;
1968       currentPosition++;
1969       while (source[currentPosition] == 'u') {
1970         currentPosition++;
1971         unicodeSize++;
1972       }
1973
1974       if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1975         || c1 < 0)
1976         || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
1977           || c2 < 0)
1978         || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
1979           || c3 < 0)
1980         || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
1981           || c4 < 0)) {
1982         throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1983       }
1984
1985       currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1986       if (recordLineSeparator
1987         && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1988         pushLineSeparator();
1989       if (Character.isWhitespace(currentCharacter))
1990         return true;
1991
1992       //buffer the new char which is not a white space
1993       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1994       //withoutUnicodePtr == 1 is true here
1995       return false;
1996     } catch (IndexOutOfBoundsException e) {
1997       throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1998     }
1999   }
2000   public final int[] getLineEnds() {
2001     //return a bounded copy of this.lineEnds
2002
2003     int[] copy;
2004     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2005     return copy;
2006   }
2007
2008   public char[] getSource() {
2009     return this.source;
2010   }
2011   final char[] optimizedCurrentTokenSource1() {
2012     //return always the same char[] build only once
2013
2014     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2015     char charOne = source[startPosition];
2016     switch (charOne) {
2017       case 'a' :
2018         return charArray_a;
2019       case 'b' :
2020         return charArray_b;
2021       case 'c' :
2022         return charArray_c;
2023       case 'd' :
2024         return charArray_d;
2025       case 'e' :
2026         return charArray_e;
2027       case 'f' :
2028         return charArray_f;
2029       case 'g' :
2030         return charArray_g;
2031       case 'h' :
2032         return charArray_h;
2033       case 'i' :
2034         return charArray_i;
2035       case 'j' :
2036         return charArray_j;
2037       case 'k' :
2038         return charArray_k;
2039       case 'l' :
2040         return charArray_l;
2041       case 'm' :
2042         return charArray_m;
2043       case 'n' :
2044         return charArray_n;
2045       case 'o' :
2046         return charArray_o;
2047       case 'p' :
2048         return charArray_p;
2049       case 'q' :
2050         return charArray_q;
2051       case 'r' :
2052         return charArray_r;
2053       case 's' :
2054         return charArray_s;
2055       case 't' :
2056         return charArray_t;
2057       case 'u' :
2058         return charArray_u;
2059       case 'v' :
2060         return charArray_v;
2061       case 'w' :
2062         return charArray_w;
2063       case 'x' :
2064         return charArray_x;
2065       case 'y' :
2066         return charArray_y;
2067       case 'z' :
2068         return charArray_z;
2069       default :
2070         return new char[] { charOne };
2071     }
2072   }
2073   final char[] optimizedCurrentTokenSource2() {
2074     //try to return the same char[] build only once
2075
2076     char c0, c1;
2077     int hash =
2078       (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2079         % TableSize;
2080     char[][] table = charArray_length[0][hash];
2081     int i = newEntry2;
2082     while (++i < InternalTableSize) {
2083       char[] charArray = table[i];
2084       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2085         return charArray;
2086     }
2087     //---------other side---------
2088     i = -1;
2089     int max = newEntry2;
2090     while (++i <= max) {
2091       char[] charArray = table[i];
2092       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2093         return charArray;
2094     }
2095     //--------add the entry-------
2096     if (++max >= InternalTableSize)
2097       max = 0;
2098     char[] r;
2099     table[max] = (r = new char[] { c0, c1 });
2100     newEntry2 = max;
2101     return r;
2102   }
2103   final char[] optimizedCurrentTokenSource3() {
2104     //try to return the same char[] build only once
2105
2106     char c0, c1, c2;
2107     int hash =
2108       (((c0 = source[startPosition]) << 12)
2109         + ((c1 = source[startPosition + 1]) << 6)
2110         + (c2 = source[startPosition + 2]))
2111         % TableSize;
2112     char[][] table = charArray_length[1][hash];
2113     int i = newEntry3;
2114     while (++i < InternalTableSize) {
2115       char[] charArray = table[i];
2116       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2117         return charArray;
2118     }
2119     //---------other side---------
2120     i = -1;
2121     int max = newEntry3;
2122     while (++i <= max) {
2123       char[] charArray = table[i];
2124       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2125         return charArray;
2126     }
2127     //--------add the entry-------
2128     if (++max >= InternalTableSize)
2129       max = 0;
2130     char[] r;
2131     table[max] = (r = new char[] { c0, c1, c2 });
2132     newEntry3 = max;
2133     return r;
2134   }
2135   final char[] optimizedCurrentTokenSource4() {
2136     //try to return the same char[] build only once
2137
2138     char c0, c1, c2, c3;
2139     long hash =
2140       ((((long) (c0 = source[startPosition])) << 18)
2141         + ((c1 = source[startPosition + 1]) << 12)
2142         + ((c2 = source[startPosition + 2]) << 6)
2143         + (c3 = source[startPosition + 3]))
2144         % TableSize;
2145     char[][] table = charArray_length[2][(int) hash];
2146     int i = newEntry4;
2147     while (++i < InternalTableSize) {
2148       char[] charArray = table[i];
2149       if ((c0 == charArray[0])
2150         && (c1 == charArray[1])
2151         && (c2 == charArray[2])
2152         && (c3 == charArray[3]))
2153         return charArray;
2154     }
2155     //---------other side---------
2156     i = -1;
2157     int max = newEntry4;
2158     while (++i <= max) {
2159       char[] charArray = table[i];
2160       if ((c0 == charArray[0])
2161         && (c1 == charArray[1])
2162         && (c2 == charArray[2])
2163         && (c3 == charArray[3]))
2164         return charArray;
2165     }
2166     //--------add the entry-------
2167     if (++max >= InternalTableSize)
2168       max = 0;
2169     char[] r;
2170     table[max] = (r = new char[] { c0, c1, c2, c3 });
2171     newEntry4 = max;
2172     return r;
2173
2174   }
2175   final char[] optimizedCurrentTokenSource5() {
2176     //try to return the same char[] build only once
2177
2178     char c0, c1, c2, c3, c4;
2179     long hash =
2180       ((((long) (c0 = source[startPosition])) << 24)
2181         + (((long) (c1 = source[startPosition + 1])) << 18)
2182         + ((c2 = source[startPosition + 2]) << 12)
2183         + ((c3 = source[startPosition + 3]) << 6)
2184         + (c4 = source[startPosition + 4]))
2185         % TableSize;
2186     char[][] table = charArray_length[3][(int) hash];
2187     int i = newEntry5;
2188     while (++i < InternalTableSize) {
2189       char[] charArray = table[i];
2190       if ((c0 == charArray[0])
2191         && (c1 == charArray[1])
2192         && (c2 == charArray[2])
2193         && (c3 == charArray[3])
2194         && (c4 == charArray[4]))
2195         return charArray;
2196     }
2197     //---------other side---------
2198     i = -1;
2199     int max = newEntry5;
2200     while (++i <= max) {
2201       char[] charArray = table[i];
2202       if ((c0 == charArray[0])
2203         && (c1 == charArray[1])
2204         && (c2 == charArray[2])
2205         && (c3 == charArray[3])
2206         && (c4 == charArray[4]))
2207         return charArray;
2208     }
2209     //--------add the entry-------
2210     if (++max >= InternalTableSize)
2211       max = 0;
2212     char[] r;
2213     table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2214     newEntry5 = max;
2215     return r;
2216
2217   }
2218   final char[] optimizedCurrentTokenSource6() {
2219     //try to return the same char[] build only once
2220
2221     char c0, c1, c2, c3, c4, c5;
2222     long hash =
2223       ((((long) (c0 = source[startPosition])) << 32)
2224         + (((long) (c1 = source[startPosition + 1])) << 24)
2225         + (((long) (c2 = source[startPosition + 2])) << 18)
2226         + ((c3 = source[startPosition + 3]) << 12)
2227         + ((c4 = source[startPosition + 4]) << 6)
2228         + (c5 = source[startPosition + 5]))
2229         % TableSize;
2230     char[][] table = charArray_length[4][(int) hash];
2231     int i = newEntry6;
2232     while (++i < InternalTableSize) {
2233       char[] charArray = table[i];
2234       if ((c0 == charArray[0])
2235         && (c1 == charArray[1])
2236         && (c2 == charArray[2])
2237         && (c3 == charArray[3])
2238         && (c4 == charArray[4])
2239         && (c5 == charArray[5]))
2240         return charArray;
2241     }
2242     //---------other side---------
2243     i = -1;
2244     int max = newEntry6;
2245     while (++i <= max) {
2246       char[] charArray = table[i];
2247       if ((c0 == charArray[0])
2248         && (c1 == charArray[1])
2249         && (c2 == charArray[2])
2250         && (c3 == charArray[3])
2251         && (c4 == charArray[4])
2252         && (c5 == charArray[5]))
2253         return charArray;
2254     }
2255     //--------add the entry-------
2256     if (++max >= InternalTableSize)
2257       max = 0;
2258     char[] r;
2259     table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2260     newEntry6 = max;
2261     return r;
2262   }
2263   public final void pushLineSeparator() throws InvalidInputException {
2264     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2265     final int INCREMENT = 250;
2266
2267     if (this.checkNonExternalizedStringLiterals) {
2268       // reinitialize the current line for non externalize strings purpose
2269       currentLine = null;
2270     }
2271     //currentCharacter is at position currentPosition-1
2272
2273     // cr 000D
2274     if (currentCharacter == '\r') {
2275       int separatorPos = currentPosition - 1;
2276       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2277         return;
2278       //System.out.println("CR-" + separatorPos);
2279       try {
2280         lineEnds[++linePtr] = separatorPos;
2281       } catch (IndexOutOfBoundsException e) {
2282         //linePtr value is correct
2283         int oldLength = lineEnds.length;
2284         int[] old = lineEnds;
2285         lineEnds = new int[oldLength + INCREMENT];
2286         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2287         lineEnds[linePtr] = separatorPos;
2288       }
2289       // look-ahead for merged cr+lf
2290       try {
2291         if (source[currentPosition] == '\n') {
2292           //System.out.println("look-ahead LF-" + currentPosition);
2293           lineEnds[linePtr] = currentPosition;
2294           currentPosition++;
2295           wasAcr = false;
2296         } else {
2297           wasAcr = true;
2298         }
2299       } catch (IndexOutOfBoundsException e) {
2300         wasAcr = true;
2301       }
2302     } else {
2303       // lf 000A
2304       if (currentCharacter == '\n') {
2305         //must merge eventual cr followed by lf
2306         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2307           //System.out.println("merge LF-" + (currentPosition - 1));
2308           lineEnds[linePtr] = currentPosition - 1;
2309         } else {
2310           int separatorPos = currentPosition - 1;
2311           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2312             return;
2313           // System.out.println("LF-" + separatorPos);
2314           try {
2315             lineEnds[++linePtr] = separatorPos;
2316           } catch (IndexOutOfBoundsException e) {
2317             //linePtr value is correct
2318             int oldLength = lineEnds.length;
2319             int[] old = lineEnds;
2320             lineEnds = new int[oldLength + INCREMENT];
2321             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2322             lineEnds[linePtr] = separatorPos;
2323           }
2324         }
2325         wasAcr = false;
2326       }
2327     }
2328   }
2329   public final void pushUnicodeLineSeparator() {
2330     // isUnicode means that the \r or \n has been read as a unicode character
2331
2332     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2333
2334     final int INCREMENT = 250;
2335     //currentCharacter is at position currentPosition-1
2336
2337     if (this.checkNonExternalizedStringLiterals) {
2338       // reinitialize the current line for non externalize strings purpose
2339       currentLine = null;
2340     }
2341
2342     // cr 000D
2343     if (currentCharacter == '\r') {
2344       int separatorPos = currentPosition - 6;
2345       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2346         return;
2347       //System.out.println("CR-" + separatorPos);
2348       try {
2349         lineEnds[++linePtr] = separatorPos;
2350       } catch (IndexOutOfBoundsException e) {
2351         //linePtr value is correct
2352         int oldLength = lineEnds.length;
2353         int[] old = lineEnds;
2354         lineEnds = new int[oldLength + INCREMENT];
2355         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2356         lineEnds[linePtr] = separatorPos;
2357       }
2358       // look-ahead for merged cr+lf
2359       if (source[currentPosition] == '\n') {
2360         //System.out.println("look-ahead LF-" + currentPosition);
2361         lineEnds[linePtr] = currentPosition;
2362         currentPosition++;
2363         wasAcr = false;
2364       } else {
2365         wasAcr = true;
2366       }
2367     } else {
2368       // lf 000A
2369       if (currentCharacter == '\n') {
2370         //must merge eventual cr followed by lf
2371         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2372           //System.out.println("merge LF-" + (currentPosition - 1));
2373           lineEnds[linePtr] = currentPosition - 6;
2374         } else {
2375           int separatorPos = currentPosition - 6;
2376           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2377             return;
2378           // System.out.println("LF-" + separatorPos);
2379           try {
2380             lineEnds[++linePtr] = separatorPos;
2381           } catch (IndexOutOfBoundsException e) {
2382             //linePtr value is correct
2383             int oldLength = lineEnds.length;
2384             int[] old = lineEnds;
2385             lineEnds = new int[oldLength + INCREMENT];
2386             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2387             lineEnds[linePtr] = separatorPos;
2388           }
2389         }
2390         wasAcr = false;
2391       }
2392     }
2393   }
2394   public final void recordComment(boolean isJavadoc) {
2395
2396     // a new annotation comment is recorded
2397     try {
2398       commentStops[++commentPtr] =
2399         isJavadoc ? currentPosition : -currentPosition;
2400     } catch (IndexOutOfBoundsException e) {
2401       int oldStackLength = commentStops.length;
2402       int[] oldStack = commentStops;
2403       commentStops = new int[oldStackLength + 30];
2404       System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2405       commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2406       //grows the positions buffers too
2407       int[] old = commentStarts;
2408       commentStarts = new int[oldStackLength + 30];
2409       System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2410     }
2411
2412     //the buffer is of a correct size here
2413     commentStarts[commentPtr] = startPosition;
2414   }
2415   public void resetTo(int begin, int end) {
2416     //reset the scanner to a given position where it may rescan again
2417
2418     diet = false;
2419     initialPosition = startPosition = currentPosition = begin;
2420     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2421     commentPtr = -1; // reset comment stack
2422   }
2423
2424   public final void scanEscapeCharacter() throws InvalidInputException {
2425     // the string with "\\u" is a legal string of two chars \ and u
2426     //thus we use a direct access to the source (for regular cases).
2427
2428     if (unicodeAsBackSlash) {
2429       // consume next character
2430       unicodeAsBackSlash = false;
2431       if (((currentCharacter = source[currentPosition++]) == '\\')
2432         && (source[currentPosition] == 'u')) {
2433         getNextUnicodeChar();
2434       } else {
2435         if (withoutUnicodePtr != 0) {
2436           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2437         }
2438       }
2439     } else
2440       currentCharacter = source[currentPosition++];
2441     switch (currentCharacter) {
2442       case 'b' :
2443         currentCharacter = '\b';
2444         break;
2445       case 't' :
2446         currentCharacter = '\t';
2447         break;
2448       case 'n' :
2449         currentCharacter = '\n';
2450         break;
2451       case 'f' :
2452         currentCharacter = '\f';
2453         break;
2454       case 'r' :
2455         currentCharacter = '\r';
2456         break;
2457       case '\"' :
2458         currentCharacter = '\"';
2459         break;
2460       case '\'' :
2461         currentCharacter = '\'';
2462         break;
2463       case '\\' :
2464         currentCharacter = '\\';
2465         break;
2466       default :
2467         // -----------octal escape--------------
2468         // OctalDigit
2469         // OctalDigit OctalDigit
2470         // ZeroToThree OctalDigit OctalDigit
2471
2472         int number = Character.getNumericValue(currentCharacter);
2473         if (number >= 0 && number <= 7) {
2474           boolean zeroToThreeNot = number > 3;
2475           if (Character
2476             .isDigit(currentCharacter = source[currentPosition++])) {
2477             int digit = Character.getNumericValue(currentCharacter);
2478             if (digit >= 0 && digit <= 7) {
2479               number = (number * 8) + digit;
2480               if (Character
2481                 .isDigit(currentCharacter = source[currentPosition++])) {
2482                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2483                   currentPosition--;
2484                 } else {
2485                   digit = Character.getNumericValue(currentCharacter);
2486                   if (digit >= 0 && digit <= 7) {
2487                     // has read \ZeroToThree OctalDigit OctalDigit
2488                     number = (number * 8) + digit;
2489                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2490                     currentPosition--;
2491                   }
2492                 }
2493               } else { // has read \OctalDigit NonDigit--> ignore last character
2494                 currentPosition--;
2495               }
2496             } else { // has read \OctalDigit NonOctalDigit--> ignore last character
2497               currentPosition--;
2498             }
2499           } else { // has read \OctalDigit --> ignore last character
2500             currentPosition--;
2501           }
2502           if (number > 255)
2503             throw new InvalidInputException(INVALID_ESCAPE);
2504           currentCharacter = (char) number;
2505         }
2506         //else
2507         //     throw new InvalidInputException(INVALID_ESCAPE);
2508     }
2509   }
2510
2511   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2512   //    return scanIdentifierOrKeyword( false );
2513   //  }
2514
2515   public int scanIdentifierOrKeyword(boolean isVariable)
2516     throws InvalidInputException {
2517     //test keywords
2518
2519     //first dispatch on the first char.
2520     //then the length. If there are several
2521     //keywors with the same length AND the same first char, then do another
2522     //disptach on the second char :-)...cool....but fast !
2523
2524     useAssertAsAnIndentifier = false;
2525     while (getNextCharAsJavaIdentifierPart()) {
2526     };
2527
2528     if (isVariable) {
2529       return TokenNameVariable;
2530     }
2531     int index, length;
2532     char[] data;
2533     char firstLetter;
2534     if (withoutUnicodePtr == 0)
2535
2536       //quick test on length == 1 but not on length > 12 while most identifier
2537       //have a length which is <= 12...but there are lots of identifier with
2538       //only one char....
2539
2540       {
2541       if ((length = currentPosition - startPosition) == 1)
2542         return TokenNameIdentifier;
2543       //  data = source;
2544       data = new char[length];
2545       index = startPosition;
2546       for (int i = 0; i < length; i++) {
2547         data[i] = Character.toLowerCase(source[index + i]);
2548       }
2549       index = 0;
2550     } else {
2551       if ((length = withoutUnicodePtr) == 1)
2552         return TokenNameIdentifier;
2553       // data = withoutUnicodeBuffer;
2554       data = new char[withoutUnicodeBuffer.length];
2555       for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2556         data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2557       }
2558       index = 1;
2559     }
2560
2561     firstLetter = data[index];
2562     switch (firstLetter) {
2563
2564       case 'a' : // as and array
2565         switch (length) {
2566           case 2 : //as
2567             if ((data[++index] == 's')) {
2568               return TokenNameas;
2569             } else {
2570               return TokenNameIdentifier;
2571             }
2572           case 3 : //and
2573             if ((data[++index] == 'n') && (data[++index] == 'd')) {
2574               return TokenNameAND;
2575             } else {
2576               return TokenNameIdentifier;
2577             }
2578             //          case 5 :
2579             //            if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2580             //              return TokenNamearray;
2581             //            else
2582             //              return TokenNameIdentifier;
2583           default :
2584             return TokenNameIdentifier;
2585         }
2586       case 'b' : //break
2587         switch (length) {
2588           case 5 :
2589             if ((data[++index] == 'r')
2590               && (data[++index] == 'e')
2591               && (data[++index] == 'a')
2592               && (data[++index] == 'k'))
2593               return TokenNamebreak;
2594             else
2595               return TokenNameIdentifier;
2596           default :
2597             return TokenNameIdentifier;
2598         }
2599
2600       case 'c' : //case class continue
2601         switch (length) {
2602           case 4 :
2603             if ((data[++index] == 'a')
2604               && (data[++index] == 's')
2605               && (data[++index] == 'e'))
2606               return TokenNamecase;
2607             else
2608               return TokenNameIdentifier;
2609           case 5 :
2610             if ((data[++index] == 'l')
2611               && (data[++index] == 'a')
2612               && (data[++index] == 's')
2613               && (data[++index] == 's'))
2614               return TokenNameclass;
2615             else
2616               return TokenNameIdentifier;
2617           case 8 :
2618             if ((data[++index] == 'o')
2619               && (data[++index] == 'n')
2620               && (data[++index] == 't')
2621               && (data[++index] == 'i')
2622               && (data[++index] == 'n')
2623               && (data[++index] == 'u')
2624               && (data[++index] == 'e'))
2625               return TokenNamecontinue;
2626             else
2627               return TokenNameIdentifier;
2628           default :
2629             return TokenNameIdentifier;
2630         }
2631
2632       case 'd' : //define default do
2633         switch (length) {
2634           case 2 :
2635             if ((data[++index] == 'o'))
2636               return TokenNamedo;
2637             else
2638               return TokenNameIdentifier;
2639           case 6 :
2640             if ((data[++index] == 'e')
2641               && (data[++index] == 'f')
2642               && (data[++index] == 'i')
2643               && (data[++index] == 'n')
2644               && (data[++index] == 'e'))
2645               return TokenNamedefine;
2646             else
2647               return TokenNameIdentifier;
2648           case 7 :
2649             if ((data[++index] == 'e')
2650               && (data[++index] == 'f')
2651               && (data[++index] == 'a')
2652               && (data[++index] == 'u')
2653               && (data[++index] == 'l')
2654               && (data[++index] == 't'))
2655               return TokenNamedefault;
2656             else
2657               return TokenNameIdentifier;
2658           default :
2659             return TokenNameIdentifier;
2660         }
2661       case 'e' : //echo else elseif extends
2662         switch (length) {
2663           case 4 :
2664             if ((data[++index] == 'c')
2665               && (data[++index] == 'h')
2666               && (data[++index] == 'o'))
2667               return TokenNameecho;
2668             else if (
2669               (data[index] == 'l')
2670                 && (data[++index] == 's')
2671                 && (data[++index] == 'e'))
2672               return TokenNameelse;
2673             else
2674               return TokenNameIdentifier;
2675           case 5 : // endif
2676             if ((data[++index] == 'n')
2677               && (data[++index] == 'd')
2678               && (data[++index] == 'i')
2679               && (data[++index] == 'f'))
2680               return TokenNameendif;
2681             else
2682               return TokenNameIdentifier;
2683           case 6 : // endfor
2684             if ((data[++index] == 'n')
2685               && (data[++index] == 'd')
2686               && (data[++index] == 'f')
2687               && (data[++index] == 'o')
2688               && (data[++index] == 'r'))
2689               return TokenNameendfor;
2690             else if (
2691               (data[index] == 'l')
2692                 && (data[++index] == 's')
2693                 && (data[++index] == 'e')
2694                 && (data[++index] == 'i')
2695                 && (data[++index] == 'f'))
2696               return TokenNameelseif;
2697             else
2698               return TokenNameIdentifier;
2699           case 7 :
2700             if ((data[++index] == 'x')
2701               && (data[++index] == 't')
2702               && (data[++index] == 'e')
2703               && (data[++index] == 'n')
2704               && (data[++index] == 'd')
2705               && (data[++index] == 's'))
2706               return TokenNameextends;
2707             else
2708               return TokenNameIdentifier;
2709           case 8 : // endwhile
2710             if ((data[++index] == 'n')
2711               && (data[++index] == 'd')
2712               && (data[++index] == 'w')
2713               && (data[++index] == 'h')
2714               && (data[++index] == 'i')
2715               && (data[++index] == 'l')
2716               && (data[++index] == 'e'))
2717               return TokenNameendwhile;
2718             else
2719               return TokenNameIdentifier;
2720           case 9 : // endswitch
2721             if ((data[++index] == 'n')
2722               && (data[++index] == 'd')
2723               && (data[++index] == 's')
2724               && (data[++index] == 'w')
2725               && (data[++index] == 'i')
2726               && (data[++index] == 't')
2727               && (data[++index] == 'c')
2728               && (data[++index] == 'h'))
2729               return TokenNameendswitch;
2730             else
2731               return TokenNameIdentifier;
2732           case 10 : // endforeach
2733             if ((data[++index] == 'n')
2734               && (data[++index] == 'd')
2735               && (data[++index] == 'f')
2736               && (data[++index] == 'o')
2737               && (data[++index] == 'r')
2738               && (data[++index] == 'e')
2739               && (data[++index] == 'a')
2740               && (data[++index] == 'c')
2741               && (data[++index] == 'h'))
2742               return TokenNameendforeach;
2743             else
2744               return TokenNameIdentifier;
2745
2746           default :
2747             return TokenNameIdentifier;
2748         }
2749
2750       case 'f' : //for false function
2751         switch (length) {
2752           case 3 :
2753             if ((data[++index] == 'o') && (data[++index] == 'r'))
2754               return TokenNamefor;
2755             else
2756               return TokenNameIdentifier;
2757           case 5 :
2758             if ((data[++index] == 'a')
2759               && (data[++index] == 'l')
2760               && (data[++index] == 's')
2761               && (data[++index] == 'e'))
2762               return TokenNamefalse;
2763             else
2764               return TokenNameIdentifier;
2765           case 7 : // function
2766             if ((data[++index] == 'o')
2767               && (data[++index] == 'r')
2768               && (data[++index] == 'e')
2769               && (data[++index] == 'a')
2770               && (data[++index] == 'c')
2771               && (data[++index] == 'h'))
2772               return TokenNameforeach;
2773             else
2774               return TokenNameIdentifier;
2775           case 8 : // function
2776             if ((data[++index] == 'u')
2777               && (data[++index] == 'n')
2778               && (data[++index] == 'c')
2779               && (data[++index] == 't')
2780               && (data[++index] == 'i')
2781               && (data[++index] == 'o')
2782               && (data[++index] == 'n'))
2783               return TokenNamefunction;
2784             else
2785               return TokenNameIdentifier;
2786           default :
2787             return TokenNameIdentifier;
2788         }
2789       case 'g' : //global
2790         if (length == 6) {
2791           if ((data[++index] == 'l')
2792             && (data[++index] == 'o')
2793             && (data[++index] == 'b')
2794             && (data[++index] == 'a')
2795             && (data[++index] == 'l')) {
2796             return TokenNameglobal;
2797           }
2798         }
2799         return TokenNameIdentifier;
2800
2801       case 'i' : //if int
2802         switch (length) {
2803           case 2 :
2804             if (data[++index] == 'f')
2805               return TokenNameif;
2806             else
2807               return TokenNameIdentifier;
2808             //          case 3 :
2809             //            if ((data[++index] == 'n') && (data[++index] == 't'))
2810             //              return TokenNameint;
2811             //            else
2812             //              return TokenNameIdentifier;
2813           case 7 :
2814             if ((data[++index] == 'n')
2815               && (data[++index] == 'c')
2816               && (data[++index] == 'l')
2817               && (data[++index] == 'u')
2818               && (data[++index] == 'd')
2819               && (data[++index] == 'e'))
2820               return TokenNameinclude;
2821             else
2822               return TokenNameIdentifier;
2823           case 12 :
2824             if ((data[++index] == 'n')
2825               && (data[++index] == 'c')
2826               && (data[++index] == 'l')
2827               && (data[++index] == 'u')
2828               && (data[++index] == 'd')
2829               && (data[++index] == 'e')
2830               && (data[++index] == '_')
2831               && (data[++index] == 'o')
2832               && (data[++index] == 'n')
2833               && (data[++index] == 'c')
2834               && (data[++index] == 'e'))
2835               return TokenNameinclude_once;
2836             else
2837               return TokenNameIdentifier;
2838           default :
2839             return TokenNameIdentifier;
2840         }
2841
2842       case 'l' : //list
2843         if (length == 4) {
2844           if ((data[++index] == 'i')
2845             && (data[++index] == 's')
2846             && (data[++index] == 't')) {
2847             return TokenNamelist;
2848           }
2849         }
2850         return TokenNameIdentifier;
2851
2852       case 'n' : // new null
2853         switch (length) {
2854           case 3 :
2855             if ((data[++index] == 'e') && (data[++index] == 'w'))
2856               return TokenNamenew;
2857             else
2858               return TokenNameIdentifier;
2859           case 4 :
2860             if ((data[++index] == 'u')
2861               && (data[++index] == 'l')
2862               && (data[++index] == 'l'))
2863               return TokenNamenull;
2864             else
2865               return TokenNameIdentifier;
2866
2867           default :
2868             return TokenNameIdentifier;
2869         }
2870       case 'o' : // or old_function
2871         if (length == 2) {
2872           if (data[++index] == 'r') {
2873             return TokenNameOR;
2874           }
2875         }
2876         //        if (length == 12) {
2877         //          if ((data[++index] == 'l')
2878         //            && (data[++index] == 'd')
2879         //            && (data[++index] == '_')
2880         //            && (data[++index] == 'f')
2881         //            && (data[++index] == 'u')
2882         //            && (data[++index] == 'n')
2883         //            && (data[++index] == 'c')
2884         //            && (data[++index] == 't')
2885         //            && (data[++index] == 'i')
2886         //            && (data[++index] == 'o')
2887         //            && (data[++index] == 'n')) {
2888         //            return TokenNameold_function;
2889         //          }
2890         //        }
2891         return TokenNameIdentifier;
2892
2893       case 'p' : // print
2894         if (length == 5) {
2895           if ((data[++index] == 'r')
2896             && (data[++index] == 'i')
2897             && (data[++index] == 'n')
2898             && (data[++index] == 't')) {
2899             return TokenNameprint;
2900           }
2901         }
2902         return TokenNameIdentifier;
2903       case 'r' : //return require require_once
2904         if (length == 6) {
2905           if ((data[++index] == 'e')
2906             && (data[++index] == 't')
2907             && (data[++index] == 'u')
2908             && (data[++index] == 'r')
2909             && (data[++index] == 'n')) {
2910             return TokenNamereturn;
2911           }
2912         } else if (length == 7) {
2913           if ((data[++index] == 'e')
2914             && (data[++index] == 'q')
2915             && (data[++index] == 'u')
2916             && (data[++index] == 'i')
2917             && (data[++index] == 'r')
2918             && (data[++index] == 'e')) {
2919             return TokenNamerequire;
2920           }
2921         } else if (length == 12) {
2922           if ((data[++index] == 'e')
2923             && (data[++index] == 'q')
2924             && (data[++index] == 'u')
2925             && (data[++index] == 'i')
2926             && (data[++index] == 'r')
2927             && (data[++index] == 'e')
2928             && (data[++index] == '_')
2929             && (data[++index] == 'o')
2930             && (data[++index] == 'n')
2931             && (data[++index] == 'c')
2932             && (data[++index] == 'e')) {
2933             return TokenNamerequire_once;
2934           }
2935         } else
2936           return TokenNameIdentifier;
2937
2938       case 's' : //static switch
2939         switch (length) {
2940           case 6 :
2941             if (data[++index] == 't')
2942               if ((data[++index] == 'a')
2943                 && (data[++index] == 't')
2944                 && (data[++index] == 'i')
2945                 && (data[++index] == 'c')) {
2946                 return TokenNamestatic;
2947               } else
2948                 return TokenNameIdentifier;
2949             else if (
2950               (data[index] == 'w')
2951                 && (data[++index] == 'i')
2952                 && (data[++index] == 't')
2953                 && (data[++index] == 'c')
2954                 && (data[++index] == 'h'))
2955               return TokenNameswitch;
2956             else
2957               return TokenNameIdentifier;
2958           default :
2959             return TokenNameIdentifier;
2960         }
2961
2962       case 't' : // true
2963         switch (length) {
2964
2965           case 4 :
2966             if ((data[++index] == 'r')
2967               && (data[++index] == 'u')
2968               && (data[++index] == 'e'))
2969               return TokenNametrue;
2970             else
2971               return TokenNameIdentifier;
2972             //            if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
2973             //              return TokenNamethis;
2974
2975           default :
2976             return TokenNameIdentifier;
2977         }
2978
2979       case 'v' : //var
2980         switch (length) {
2981           case 3 :
2982             if ((data[++index] == 'a') && (data[++index] == 'r'))
2983               return TokenNamevar;
2984             else
2985               return TokenNameIdentifier;
2986
2987           default :
2988             return TokenNameIdentifier;
2989         }
2990
2991       case 'w' : //while
2992         switch (length) {
2993           case 5 :
2994             if ((data[++index] == 'h')
2995               && (data[++index] == 'i')
2996               && (data[++index] == 'l')
2997               && (data[++index] == 'e'))
2998               return TokenNamewhile;
2999             else
3000               return TokenNameIdentifier;
3001             //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
3002             //return TokenNamewidefp ;
3003             //else
3004             //return TokenNameIdentifier;
3005           default :
3006             return TokenNameIdentifier;
3007         }
3008
3009       case 'x' : //xor
3010         switch (length) {
3011           case 3 :
3012             if ((data[++index] == 'o') && (data[++index] == 'r'))
3013               return TokenNameXOR;
3014             else
3015               return TokenNameIdentifier;
3016
3017           default :
3018             return TokenNameIdentifier;
3019         }
3020       default :
3021         return TokenNameIdentifier;
3022     }
3023   }
3024   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3025
3026     //when entering this method the currentCharacter is the firt
3027     //digit of the number , i.e. it may be preceeded by a . when
3028     //dotPrefix is true
3029
3030     boolean floating = dotPrefix;
3031     if ((!dotPrefix) && (currentCharacter == '0')) {
3032       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3033         //force the first char of the hexa number do exist...
3034         // consume next character
3035         unicodeAsBackSlash = false;
3036         if (((currentCharacter = source[currentPosition++]) == '\\')
3037           && (source[currentPosition] == 'u')) {
3038           getNextUnicodeChar();
3039         } else {
3040           if (withoutUnicodePtr != 0) {
3041             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3042           }
3043         }
3044         if (Character.digit(currentCharacter, 16) == -1)
3045           throw new InvalidInputException(INVALID_HEXA);
3046         //---end forcing--
3047         while (getNextCharAsDigit(16)) {
3048         };
3049         //        if (getNextChar('l', 'L') >= 0)
3050         //          return TokenNameLongLiteral;
3051         //        else
3052         return TokenNameIntegerLiteral;
3053       }
3054
3055       //there is x or X in the number
3056       //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3057       if (getNextCharAsDigit()) {
3058         //-------------potential octal-----------------
3059         while (getNextCharAsDigit()) {
3060         };
3061
3062         //        if (getNextChar('l', 'L') >= 0) {
3063         //          return TokenNameLongLiteral;
3064         //        }
3065         //
3066         //        if (getNextChar('f', 'F') >= 0) {
3067         //          return TokenNameFloatingPointLiteral;
3068         //        }
3069
3070         if (getNextChar('d', 'D') >= 0) {
3071           return TokenNameDoubleLiteral;
3072         } else { //make the distinction between octal and float ....
3073           if (getNextChar('.')) { //bingo ! ....
3074             while (getNextCharAsDigit()) {
3075             };
3076             if (getNextChar('e', 'E') >= 0) {
3077               // consume next character
3078               unicodeAsBackSlash = false;
3079               if (((currentCharacter = source[currentPosition++]) == '\\')
3080                 && (source[currentPosition] == 'u')) {
3081                 getNextUnicodeChar();
3082               } else {
3083                 if (withoutUnicodePtr != 0) {
3084                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3085                 }
3086               }
3087
3088               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3089                 // consume next character
3090                 unicodeAsBackSlash = false;
3091                 if (((currentCharacter = source[currentPosition++]) == '\\')
3092                   && (source[currentPosition] == 'u')) {
3093                   getNextUnicodeChar();
3094                 } else {
3095                   if (withoutUnicodePtr != 0) {
3096                     withoutUnicodeBuffer[++withoutUnicodePtr] =
3097                       currentCharacter;
3098                   }
3099                 }
3100               }
3101               if (!Character.isDigit(currentCharacter))
3102                 throw new InvalidInputException(INVALID_FLOAT);
3103               while (getNextCharAsDigit()) {
3104               };
3105             }
3106             //            if (getNextChar('f', 'F') >= 0)
3107             //              return TokenNameFloatingPointLiteral;
3108             getNextChar('d', 'D'); //jump over potential d or D
3109             return TokenNameDoubleLiteral;
3110           } else {
3111             return TokenNameIntegerLiteral;
3112           }
3113         }
3114       } else {
3115         /* carry on */
3116       }
3117     }
3118
3119     while (getNextCharAsDigit()) {
3120     };
3121
3122     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3123     //      return TokenNameLongLiteral;
3124
3125     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3126       while (getNextCharAsDigit()) {
3127       };
3128       floating = true;
3129     }
3130
3131     //if floating is true both exponant and suffix may be optional
3132
3133     if (getNextChar('e', 'E') >= 0) {
3134       floating = true;
3135       // consume next character
3136       unicodeAsBackSlash = false;
3137       if (((currentCharacter = source[currentPosition++]) == '\\')
3138         && (source[currentPosition] == 'u')) {
3139         getNextUnicodeChar();
3140       } else {
3141         if (withoutUnicodePtr != 0) {
3142           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3143         }
3144       }
3145
3146       if ((currentCharacter == '-')
3147         || (currentCharacter == '+')) { // consume next character
3148         unicodeAsBackSlash = false;
3149         if (((currentCharacter = source[currentPosition++]) == '\\')
3150           && (source[currentPosition] == 'u')) {
3151           getNextUnicodeChar();
3152         } else {
3153           if (withoutUnicodePtr != 0) {
3154             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3155           }
3156         }
3157       }
3158       if (!Character.isDigit(currentCharacter))
3159         throw new InvalidInputException(INVALID_FLOAT);
3160       while (getNextCharAsDigit()) {
3161       };
3162     }
3163
3164     if (getNextChar('d', 'D') >= 0)
3165       return TokenNameDoubleLiteral;
3166     //    if (getNextChar('f', 'F') >= 0)
3167     //      return TokenNameFloatingPointLiteral;
3168
3169     //the long flag has been tested before
3170
3171     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3172   }
3173   /**
3174    * Search the line number corresponding to a specific position
3175    *
3176    */
3177   public final int getLineNumber(int position) {
3178
3179     if (lineEnds == null)
3180       return 1;
3181     int length = linePtr + 1;
3182     if (length == 0)
3183       return 1;
3184     int g = 0, d = length - 1;
3185     int m = 0;
3186     while (g <= d) {
3187       m = (g + d) / 2;
3188       if (position < lineEnds[m]) {
3189         d = m - 1;
3190       } else if (position > lineEnds[m]) {
3191         g = m + 1;
3192       } else {
3193         return m + 1;
3194       }
3195     }
3196     if (position < lineEnds[m]) {
3197       return m + 1;
3198     }
3199     return m + 2;
3200   }
3201
3202   public void setPHPMode(boolean mode) {
3203     phpMode = mode;
3204   }
3205
3206   public final void setSource(char[] source) {
3207     //the source-buffer is set to sourceString
3208
3209     if (source == null) {
3210       this.source = new char[0];
3211     } else {
3212       this.source = source;
3213     }
3214     startPosition = -1;
3215     initialPosition = currentPosition = 0;
3216     containsAssertKeyword = false;
3217     withoutUnicodeBuffer = new char[this.source.length];
3218
3219   }
3220
3221   public String toString() {
3222     if (startPosition == source.length)
3223       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3224     if (currentPosition > source.length)
3225       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3226
3227     char front[] = new char[startPosition];
3228     System.arraycopy(source, 0, front, 0, startPosition);
3229
3230     int middleLength = (currentPosition - 1) - startPosition + 1;
3231     char middle[];
3232     if (middleLength > -1) {
3233       middle = new char[middleLength];
3234       System.arraycopy(source, startPosition, middle, 0, middleLength);
3235     } else {
3236       middle = new char[0];
3237     }
3238
3239     char end[] = new char[source.length - (currentPosition - 1)];
3240     System.arraycopy(
3241       source,
3242       (currentPosition - 1) + 1,
3243       end,
3244       0,
3245       source.length - (currentPosition - 1) - 1);
3246
3247     return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3248     + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3249     + new String(end);
3250   }
3251   public final String toStringAction(int act) {
3252     switch (act) {
3253       case TokenNameERROR :
3254         return "ScannerError(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3255       case TokenNameStopPHP :
3256         return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3257       case TokenNameIdentifier :
3258         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3259       case TokenNameVariable :
3260         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3261       case TokenNameas :
3262         return "as"; //$NON-NLS-1$
3263       case TokenNamebreak :
3264         return "break"; //$NON-NLS-1$
3265       case TokenNamecase :
3266         return "case"; //$NON-NLS-1$
3267       case TokenNameclass :
3268         return "class"; //$NON-NLS-1$
3269       case TokenNamecontinue :
3270         return "continue"; //$NON-NLS-1$
3271       case TokenNamedefault :
3272         return "default"; //$NON-NLS-1$
3273       case TokenNamedefine :
3274         return "define"; //$NON-NLS-1$
3275       case TokenNamedo :
3276         return "do"; //$NON-NLS-1$
3277       case TokenNameecho :
3278         return "echo"; //$NON-NLS-1$
3279       case TokenNameelse :
3280         return "else"; //$NON-NLS-1$
3281       case TokenNameelseif :
3282         return "elseif"; //$NON-NLS-1$
3283       case TokenNameendfor :
3284         return "endfor"; //$NON-NLS-1$
3285       case TokenNameendforeach :
3286         return "endforeach"; //$NON-NLS-1$
3287       case TokenNameendif :
3288         return "endif"; //$NON-NLS-1$
3289       case TokenNameendswitch :
3290         return "endswitch"; //$NON-NLS-1$
3291       case TokenNameendwhile :
3292         return "endwhile"; //$NON-NLS-1$
3293       case TokenNameextends :
3294         return "extends"; //$NON-NLS-1$
3295       case TokenNamefalse :
3296         return "false"; //$NON-NLS-1$
3297       case TokenNamefor :
3298         return "for"; //$NON-NLS-1$
3299       case TokenNameforeach :
3300         return "foreach"; //$NON-NLS-1$
3301       case TokenNamefunction :
3302         return "function"; //$NON-NLS-1$
3303       case TokenNameglobal :
3304         return "global"; //$NON-NLS-1$
3305       case TokenNameif :
3306         return "if"; //$NON-NLS-1$
3307       case TokenNameinclude :
3308         return "include"; //$NON-NLS-1$
3309       case TokenNameinclude_once :
3310         return "include_once"; //$NON-NLS-1$
3311       case TokenNamelist :
3312         return "list"; //$NON-NLS-1$
3313       case TokenNamenew :
3314         return "new"; //$NON-NLS-1$
3315       case TokenNamenull :
3316         return "null"; //$NON-NLS-1$
3317       case TokenNameprint :
3318         return "print"; //$NON-NLS-1$
3319       case TokenNamerequire :
3320         return "require"; //$NON-NLS-1$
3321       case TokenNamerequire_once :
3322         return "require_once"; //$NON-NLS-1$
3323       case TokenNamereturn :
3324         return "return"; //$NON-NLS-1$
3325       case TokenNamestatic :
3326         return "static"; //$NON-NLS-1$
3327       case TokenNameswitch :
3328         return "switch"; //$NON-NLS-1$
3329       case TokenNametrue :
3330         return "true"; //$NON-NLS-1$
3331       case TokenNamevar :
3332         return "var"; //$NON-NLS-1$
3333       case TokenNamewhile :
3334         return "while"; //$NON-NLS-1$
3335       case TokenNameIntegerLiteral :
3336         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3337       case TokenNameDoubleLiteral :
3338         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3339       case TokenNameStringLiteral :
3340         return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3341       case TokenNameStringConstant :
3342         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3343       case TokenNameStringInterpolated :
3344         return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3345       case TokenNameHEREDOC :
3346         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3347
3348       case TokenNamePLUS_PLUS :
3349         return "++"; //$NON-NLS-1$
3350       case TokenNameMINUS_MINUS :
3351         return "--"; //$NON-NLS-1$
3352       case TokenNameEQUAL_EQUAL :
3353         return "=="; //$NON-NLS-1$
3354       case TokenNameEQUAL_GREATER :
3355         return "=>"; //$NON-NLS-1$
3356       case TokenNameLESS_EQUAL :
3357         return "<="; //$NON-NLS-1$
3358       case TokenNameGREATER_EQUAL :
3359         return ">="; //$NON-NLS-1$
3360       case TokenNameNOT_EQUAL :
3361         return "!="; //$NON-NLS-1$
3362       case TokenNameLEFT_SHIFT :
3363         return "<<"; //$NON-NLS-1$
3364       case TokenNameRIGHT_SHIFT :
3365         return ">>"; //$NON-NLS-1$
3366       case TokenNamePLUS_EQUAL :
3367         return "+="; //$NON-NLS-1$
3368       case TokenNameMINUS_EQUAL :
3369         return "-="; //$NON-NLS-1$
3370       case TokenNameMULTIPLY_EQUAL :
3371         return "*="; //$NON-NLS-1$
3372       case TokenNameDIVIDE_EQUAL :
3373         return "/="; //$NON-NLS-1$
3374       case TokenNameAND_EQUAL :
3375         return "&="; //$NON-NLS-1$
3376       case TokenNameOR_EQUAL :
3377         return "|="; //$NON-NLS-1$
3378       case TokenNameXOR_EQUAL :
3379         return "^="; //$NON-NLS-1$
3380       case TokenNameREMAINDER_EQUAL :
3381         return "%="; //$NON-NLS-1$
3382       case TokenNameLEFT_SHIFT_EQUAL :
3383         return "<<="; //$NON-NLS-1$
3384       case TokenNameRIGHT_SHIFT_EQUAL :
3385         return ">>="; //$NON-NLS-1$
3386       case TokenNameOR_OR :
3387         return "||"; //$NON-NLS-1$
3388       case TokenNameAND_AND :
3389         return "&&"; //$NON-NLS-1$
3390       case TokenNamePLUS :
3391         return "+"; //$NON-NLS-1$
3392       case TokenNameMINUS :
3393         return "-"; //$NON-NLS-1$
3394       case TokenNameMINUS_GREATER :
3395         return "->";
3396       case TokenNameNOT :
3397         return "!"; //$NON-NLS-1$
3398       case TokenNameREMAINDER :
3399         return "%"; //$NON-NLS-1$
3400       case TokenNameXOR :
3401         return "^"; //$NON-NLS-1$
3402       case TokenNameAND :
3403         return "&"; //$NON-NLS-1$
3404       case TokenNameMULTIPLY :
3405         return "*"; //$NON-NLS-1$
3406       case TokenNameOR :
3407         return "|"; //$NON-NLS-1$
3408       case TokenNameTWIDDLE :
3409         return "~"; //$NON-NLS-1$
3410       case TokenNameTWIDDLE_EQUAL :
3411         return "~="; //$NON-NLS-1$
3412       case TokenNameDIVIDE :
3413         return "/"; //$NON-NLS-1$
3414       case TokenNameGREATER :
3415         return ">"; //$NON-NLS-1$
3416       case TokenNameLESS :
3417         return "<"; //$NON-NLS-1$
3418       case TokenNameLPAREN :
3419         return "("; //$NON-NLS-1$
3420       case TokenNameRPAREN :
3421         return ")"; //$NON-NLS-1$
3422       case TokenNameLBRACE :
3423         return "{"; //$NON-NLS-1$
3424       case TokenNameRBRACE :
3425         return "}"; //$NON-NLS-1$
3426       case TokenNameLBRACKET :
3427         return "["; //$NON-NLS-1$
3428       case TokenNameRBRACKET :
3429         return "]"; //$NON-NLS-1$
3430       case TokenNameSEMICOLON :
3431         return ";"; //$NON-NLS-1$
3432       case TokenNameQUESTION :
3433         return "?"; //$NON-NLS-1$
3434       case TokenNameCOLON :
3435         return ":"; //$NON-NLS-1$
3436       case TokenNameCOMMA :
3437         return ","; //$NON-NLS-1$
3438       case TokenNameDOT :
3439         return "."; //$NON-NLS-1$
3440       case TokenNameEQUAL :
3441         return "="; //$NON-NLS-1$
3442       case TokenNameAT :
3443         return "@";
3444       case TokenNameDOLLAR_LBRACE :
3445         return "${";
3446       case TokenNameEOF :
3447         return "EOF"; //$NON-NLS-1$
3448       default :
3449         return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3450     }
3451   }
3452
3453   public Scanner(
3454     boolean tokenizeComments,
3455     boolean tokenizeWhiteSpace,
3456     boolean checkNonExternalizedStringLiterals) {
3457     this(
3458       tokenizeComments,
3459       tokenizeWhiteSpace,
3460       checkNonExternalizedStringLiterals,
3461       false);
3462   }
3463
3464   public Scanner(
3465     boolean tokenizeComments,
3466     boolean tokenizeWhiteSpace,
3467     boolean checkNonExternalizedStringLiterals,
3468     boolean assertMode) {
3469     this.eofPosition = Integer.MAX_VALUE;
3470     this.tokenizeComments = tokenizeComments;
3471     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3472     this.checkNonExternalizedStringLiterals =
3473       checkNonExternalizedStringLiterals;
3474     this.assertMode = assertMode;
3475   }
3476
3477   private void checkNonExternalizeString() throws InvalidInputException {
3478     if (currentLine == null)
3479       return;
3480     parseTags(currentLine);
3481   }
3482
3483   private void parseTags(NLSLine line) throws InvalidInputException {
3484     String s = new String(getCurrentTokenSource());
3485     int pos = s.indexOf(TAG_PREFIX);
3486     int lineLength = line.size();
3487     while (pos != -1) {
3488       int start = pos + TAG_PREFIX_LENGTH;
3489       int end = s.indexOf(TAG_POSTFIX, start);
3490       String index = s.substring(start, end);
3491       int i = 0;
3492       try {
3493         i = Integer.parseInt(index) - 1;
3494         // Tags are one based not zero based.
3495       } catch (NumberFormatException e) {
3496         i = -1; // we don't want to consider this as a valid NLS tag
3497       }
3498       if (line.exists(i)) {
3499         line.set(i, null);
3500       }
3501       pos = s.indexOf(TAG_PREFIX, start);
3502     }
3503
3504     this.nonNLSStrings = new StringLiteral[lineLength];
3505     int nonNLSCounter = 0;
3506     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3507       StringLiteral literal = (StringLiteral) iterator.next();
3508       if (literal != null) {
3509         this.nonNLSStrings[nonNLSCounter++] = literal;
3510       }
3511     }
3512     if (nonNLSCounter == 0) {
3513       this.nonNLSStrings = null;
3514       currentLine = null;
3515       return;
3516     }
3517     this.wasNonExternalizedStringLiteral = true;
3518     if (nonNLSCounter != lineLength) {
3519       System.arraycopy(
3520         this.nonNLSStrings,
3521         0,
3522         (this.nonNLSStrings = new StringLiteral[nonNLSCounter]),
3523         0,
3524         nonNLSCounter);
3525     }
3526     currentLine = null;
3527   }
3528 }