net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java

   1 /*******************************************************************************
   2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
   3  * All rights reserved. This program and the accompanying materials
   4  * are made available under the terms of the Common Public License v0.5
   5  * which accompanies this distribution, and is available at
   6  * http://www.eclipse.org/legal/cpl-v05.html
   7  *
   8  * Contributors:
   9  *     IBM Corporation - initial API and implementation
  10  ******************************************************************************/
  11 package net.sourceforge.phpdt.internal.compiler.parser;
  12
  13 import java.util.ArrayList;
  14 import java.util.Iterator;
  15 import java.util.List;
  16
  17 import net.sourceforge.phpdt.core.compiler.IScanner;
  18 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
  19 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
  20 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
  21
  22 public class Scanner implements IScanner, ITerminalSymbols {
  23
  24   /* APIs ares
  25    - getNextToken() which return the current type of the token
  26      (this value is not memorized by the scanner)
  27    - getCurrentTokenSource() which provides with the token "REAL" source
  28      (aka all unicode have been transformed into a correct char)
  29    - sourceStart gives the position into the stream
  30    - currentPosition-1 gives the sourceEnd position into the stream
  31   */
  32
  33   // 1.4 feature
  34   private boolean assertMode;
  35   public boolean useAssertAsAnIndentifier = false;
  36   //flag indicating if processed source contains occurrences of keyword assert
  37   public boolean containsAssertKeyword = false;
  38
  39   public boolean recordLineSeparator;
  40   public char currentCharacter;
  41   public int startPosition;
  42   public int currentPosition;
  43   public int initialPosition, eofPosition;
  44   // after this position eof are generated instead of real token from the source
  45
  46   public boolean tokenizeComments;
  47   public boolean tokenizeWhiteSpace;
  48
  49   //source should be viewed as a window (aka a part)
  50   //of a entire very large stream
  51   public char source[];
  52
  53   //unicode support
  54   public char[] withoutUnicodeBuffer;
  55   public int withoutUnicodePtr; //when == 0 ==> no unicode in the current token
  56   public boolean unicodeAsBackSlash = false;
  57
  58   public boolean scanningFloatLiteral = false;
  59
  60   //support for /** comments
  61   //public char[][] comments = new char[10][];
  62   public int[] commentStops = new int[10];
  63   public int[] commentStarts = new int[10];
  64   public int commentPtr = -1; // no comment test with commentPtr value -1
  65
  66   //diet parsing support - jump over some method body when requested
  67   public boolean diet = false;
  68
  69   //support for the  poor-line-debuggers ....
  70   //remember the position of the cr/lf
  71   public int[] lineEnds = new int[250];
  72   public int linePtr = -1;
  73   public boolean wasAcr = false;
  74
  75   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
  76
  77   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
  78   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
  79   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
  80   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
  81   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
  82   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
  83   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
  84
  85   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
  86   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
  87   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
  88   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
  89
  90   //----------------optimized identifier managment------------------
  91   static final char[] charArray_a = new char[] { 'a' },
  92     charArray_b = new char[] { 'b' },
  93     charArray_c = new char[] { 'c' },
  94     charArray_d = new char[] { 'd' },
  95     charArray_e = new char[] { 'e' },
  96     charArray_f = new char[] { 'f' },
  97     charArray_g = new char[] { 'g' },
  98     charArray_h = new char[] { 'h' },
  99     charArray_i = new char[] { 'i' },
 100     charArray_j = new char[] { 'j' },
 101     charArray_k = new char[] { 'k' },
 102     charArray_l = new char[] { 'l' },
 103     charArray_m = new char[] { 'm' },
 104     charArray_n = new char[] { 'n' },
 105     charArray_o = new char[] { 'o' },
 106     charArray_p = new char[] { 'p' },
 107     charArray_q = new char[] { 'q' },
 108     charArray_r = new char[] { 'r' },
 109     charArray_s = new char[] { 's' },
 110     charArray_t = new char[] { 't' },
 111     charArray_u = new char[] { 'u' },
 112     charArray_v = new char[] { 'v' },
 113     charArray_w = new char[] { 'w' },
 114     charArray_x = new char[] { 'x' },
 115     charArray_y = new char[] { 'y' },
 116     charArray_z = new char[] { 'z' };
 117
 118   static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
 119   static final int TableSize = 30, InternalTableSize = 6; //30*6 = 180 entries
 120   public static final int OptimizedLength = 6;
 121   public /*static*/
 122   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
 123   // support for detecting non-externalized string literals
 124   int currentLineNr = -1;
 125   int previousLineNr = -1;
 126   NLSLine currentLine = null;
 127   List lines = new ArrayList();
 128   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
 129   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
 130   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
 131   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
 132   public StringLiteral[] nonNLSStrings = null;
 133   public boolean checkNonExternalizedStringLiterals = true;
 134   public boolean wasNonExternalizedStringLiteral = false;
 135
 136   /*static*/ {
 137     for (int i = 0; i < 6; i++) {
 138       for (int j = 0; j < TableSize; j++) {
 139         for (int k = 0; k < InternalTableSize; k++) {
 140           charArray_length[i][j][k] = initCharArray;
 141         }
 142       }
 143     }
 144   }
 145   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
 146
 147   public static final int RoundBracket = 0;
 148   public static final int SquareBracket = 1;
 149   public static final int CurlyBracket = 2;
 150   public static final int BracketKinds = 3;
 151   public Scanner() {
 152     this(false, false);
 153   }
 154   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
 155     this(tokenizeComments, tokenizeWhiteSpace, false);
 156   }
 157   public final boolean atEnd() {
 158     // This code is not relevant if source is
 159     // Only a part of the real stream input
 160
 161     return source.length == currentPosition;
 162   }
 163   public char[] getCurrentIdentifierSource() {
 164     //return the token REAL source (aka unicodes are precomputed)
 165
 166     char[] result;
 167     if (withoutUnicodePtr != 0)
 168       //0 is used as a fast test flag so the real first char is in position 1
 169       System.arraycopy(withoutUnicodeBuffer, 1, result = new char[withoutUnicodePtr], 0, withoutUnicodePtr);
 170     else {
 171       int length = currentPosition - startPosition;
 172       switch (length) { // see OptimizedLength
 173         case 1 :
 174           return optimizedCurrentTokenSource1();
 175         case 2 :
 176           return optimizedCurrentTokenSource2();
 177         case 3 :
 178           return optimizedCurrentTokenSource3();
 179         case 4 :
 180           return optimizedCurrentTokenSource4();
 181         case 5 :
 182           return optimizedCurrentTokenSource5();
 183         case 6 :
 184           return optimizedCurrentTokenSource6();
 185       }
 186       //no optimization
 187       System.arraycopy(source, startPosition, result = new char[length], 0, length);
 188     }
 189     return result;
 190   }
 191   public int getCurrentTokenEndPosition() {
 192     return this.currentPosition - 1;
 193   }
 194   public final char[] getCurrentTokenSource() {
 195     // Return the token REAL source (aka unicodes are precomputed)
 196
 197     char[] result;
 198     if (withoutUnicodePtr != 0)
 199       // 0 is used as a fast test flag so the real first char is in position 1
 200       System.arraycopy(withoutUnicodeBuffer, 1, result = new char[withoutUnicodePtr], 0, withoutUnicodePtr);
 201     else {
 202       int length;
 203       System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
 204     }
 205     return result;
 206   }
 207   public final char[] getCurrentTokenSourceString() {
 208     //return the token REAL source (aka unicodes are precomputed).
 209     //REMOVE the two " that are at the beginning and the end.
 210
 211     char[] result;
 212     if (withoutUnicodePtr != 0)
 213       //0 is used as a fast test flag so the real first char is in position 1
 214       System.arraycopy(withoutUnicodeBuffer, 2,
 215       //2 is 1 (real start) + 1 (to jump over the ")
 216       result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
 217     else {
 218       int length;
 219       System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 220     }
 221     return result;
 222   }
 223   public int getCurrentTokenStartPosition() {
 224     return this.startPosition;
 225   }
 226   /*
 227    * Search the source position corresponding to the end of a given line number
 228    *
 229    * Line numbers are 1-based, and relative to the scanner initialPosition.
 230    * Character positions are 0-based.
 231    *
 232    * In case the given line number is inconsistent, answers -1.
 233    */
 234   public final int getLineEnd(int lineNumber) {
 235
 236     if (lineEnds == null)
 237       return -1;
 238     if (lineNumber >= lineEnds.length)
 239       return -1;
 240     if (lineNumber <= 0)
 241       return -1;
 242
 243     if (lineNumber == lineEnds.length - 1)
 244       return eofPosition;
 245     return lineEnds[lineNumber - 1]; // next line start one character behind the lineEnd of the previous line
 246   }
 247   /**
 248    * Search the source position corresponding to the beginning of a given line number
 249    *
 250    * Line numbers are 1-based, and relative to the scanner initialPosition.
 251    * Character positions are 0-based.
 252    *
 253    * e.g.       getLineStart(1) --> 0   i.e. first line starts at character 0.
 254    *
 255    * In case the given line number is inconsistent, answers -1.
 256    */
 257   public final int getLineStart(int lineNumber) {
 258
 259     if (lineEnds == null)
 260       return -1;
 261     if (lineNumber >= lineEnds.length)
 262       return -1;
 263     if (lineNumber <= 0)
 264       return -1;
 265
 266     if (lineNumber == 1)
 267       return initialPosition;
 268     return lineEnds[lineNumber - 2] + 1; // next line start one character behind the lineEnd of the previous line
 269   }
 270   public final boolean getNextChar(char testedChar) {
 271     //BOOLEAN
 272     //handle the case of unicode.
 273     //when a unicode appears then we must use a buffer that holds char internal values
 274     //At the end of this method currentCharacter holds the new visited char
 275     //and currentPosition points right next after it
 276     //Both previous lines are true if the currentCharacter is == to the testedChar
 277     //On false, no side effect has occured.
 278
 279     //ALL getNextChar.... ARE OPTIMIZED COPIES
 280
 281     int temp = currentPosition;
 282     try {
 283       if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
 284         //-------------unicode traitement ------------
 285         int c1, c2, c3, c4;
 286         int unicodeSize = 6;
 287         currentPosition++;
 288         while (source[currentPosition] == 'u') {
 289           currentPosition++;
 290           unicodeSize++;
 291         }
 292
 293         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0)
 294           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0)
 295           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0)
 296           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) {
 297           currentPosition = temp;
 298           return false;
 299         }
 300
 301         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 302         if (currentCharacter != testedChar) {
 303           currentPosition = temp;
 304           return false;
 305         }
 306         unicodeAsBackSlash = currentCharacter == '\\';
 307
 308         //need the unicode buffer
 309         if (withoutUnicodePtr == 0) {
 310           //buffer all the entries that have been left aside....
 311           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 312           System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
 313         }
 314         //fill the buffer with the char
 315         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 316         return true;
 317
 318       } //-------------end unicode traitement--------------
 319       else {
 320         if (currentCharacter != testedChar) {
 321           currentPosition = temp;
 322           return false;
 323         }
 324         unicodeAsBackSlash = false;
 325         if (withoutUnicodePtr != 0)
 326           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 327         return true;
 328       }
 329     } catch (IndexOutOfBoundsException e) {
 330       unicodeAsBackSlash = false;
 331       currentPosition = temp;
 332       return false;
 333     }
 334   }
 335   public final int getNextChar(char testedChar1, char testedChar2) {
 336     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
 337     //test can be done with (x==0) for the first and (x>0) for the second
 338     //handle the case of unicode.
 339     //when a unicode appears then we must use a buffer that holds char internal values
 340     //At the end of this method currentCharacter holds the new visited char
 341     //and currentPosition points right next after it
 342     //Both previous lines are true if the currentCharacter is == to the testedChar1/2
 343     //On false, no side effect has occured.
 344
 345     //ALL getNextChar.... ARE OPTIMIZED COPIES
 346
 347     int temp = currentPosition;
 348     try {
 349       int result;
 350       if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
 351         //-------------unicode traitement ------------
 352         int c1, c2, c3, c4;
 353         int unicodeSize = 6;
 354         currentPosition++;
 355         while (source[currentPosition] == 'u') {
 356           currentPosition++;
 357           unicodeSize++;
 358         }
 359
 360         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0)
 361           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0)
 362           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0)
 363           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) {
 364           currentPosition = temp;
 365           return 2;
 366         }
 367
 368         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 369         if (currentCharacter == testedChar1)
 370           result = 0;
 371         else if (currentCharacter == testedChar2)
 372           result = 1;
 373         else {
 374           currentPosition = temp;
 375           return -1;
 376         }
 377
 378         //need the unicode buffer
 379         if (withoutUnicodePtr == 0) {
 380           //buffer all the entries that have been left aside....
 381           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 382           System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
 383         }
 384         //fill the buffer with the char
 385         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 386         return result;
 387       } //-------------end unicode traitement--------------
 388       else {
 389         if (currentCharacter == testedChar1)
 390           result = 0;
 391         else if (currentCharacter == testedChar2)
 392           result = 1;
 393         else {
 394           currentPosition = temp;
 395           return -1;
 396         }
 397
 398         if (withoutUnicodePtr != 0)
 399           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 400         return result;
 401       }
 402     } catch (IndexOutOfBoundsException e) {
 403       currentPosition = temp;
 404       return -1;
 405     }
 406   }
 407   public final boolean getNextCharAsDigit() {
 408     //BOOLEAN
 409     //handle the case of unicode.
 410     //when a unicode appears then we must use a buffer that holds char internal values
 411     //At the end of this method currentCharacter holds the new visited char
 412     //and currentPosition points right next after it
 413     //Both previous lines are true if the currentCharacter is a digit
 414     //On false, no side effect has occured.
 415
 416     //ALL getNextChar.... ARE OPTIMIZED COPIES
 417
 418     int temp = currentPosition;
 419     try {
 420       if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
 421         //-------------unicode traitement ------------
 422         int c1, c2, c3, c4;
 423         int unicodeSize = 6;
 424         currentPosition++;
 425         while (source[currentPosition] == 'u') {
 426           currentPosition++;
 427           unicodeSize++;
 428         }
 429
 430         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0)
 431           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0)
 432           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0)
 433           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) {
 434           currentPosition = temp;
 435           return false;
 436         }
 437
 438         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 439         if (!Character.isDigit(currentCharacter)) {
 440           currentPosition = temp;
 441           return false;
 442         }
 443
 444         //need the unicode buffer
 445         if (withoutUnicodePtr == 0) {
 446           //buffer all the entries that have been left aside....
 447           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 448           System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
 449         }
 450         //fill the buffer with the char
 451         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 452         return true;
 453       } //-------------end unicode traitement--------------
 454       else {
 455         if (!Character.isDigit(currentCharacter)) {
 456           currentPosition = temp;
 457           return false;
 458         }
 459         if (withoutUnicodePtr != 0)
 460           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 461         return true;
 462       }
 463     } catch (IndexOutOfBoundsException e) {
 464       currentPosition = temp;
 465       return false;
 466     }
 467   }
 468   public final boolean getNextCharAsDigit(int radix) {
 469     //BOOLEAN
 470     //handle the case of unicode.
 471     //when a unicode appears then we must use a buffer that holds char internal values
 472     //At the end of this method currentCharacter holds the new visited char
 473     //and currentPosition points right next after it
 474     //Both previous lines are true if the currentCharacter is a digit base on radix
 475     //On false, no side effect has occured.
 476
 477     //ALL getNextChar.... ARE OPTIMIZED COPIES
 478
 479     int temp = currentPosition;
 480     try {
 481       if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
 482         //-------------unicode traitement ------------
 483         int c1, c2, c3, c4;
 484         int unicodeSize = 6;
 485         currentPosition++;
 486         while (source[currentPosition] == 'u') {
 487           currentPosition++;
 488           unicodeSize++;
 489         }
 490
 491         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0)
 492           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0)
 493           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0)
 494           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) {
 495           currentPosition = temp;
 496           return false;
 497         }
 498
 499         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 500         if (Character.digit(currentCharacter, radix) == -1) {
 501           currentPosition = temp;
 502           return false;
 503         }
 504
 505         //need the unicode buffer
 506         if (withoutUnicodePtr == 0) {
 507           //buffer all the entries that have been left aside....
 508           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 509           System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
 510         }
 511         //fill the buffer with the char
 512         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 513         return true;
 514       } //-------------end unicode traitement--------------
 515       else {
 516         if (Character.digit(currentCharacter, radix) == -1) {
 517           currentPosition = temp;
 518           return false;
 519         }
 520         if (withoutUnicodePtr != 0)
 521           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 522         return true;
 523       }
 524     } catch (IndexOutOfBoundsException e) {
 525       currentPosition = temp;
 526       return false;
 527     }
 528   }
 529   public boolean getNextCharAsJavaIdentifierPart() {
 530     //BOOLEAN
 531     //handle the case of unicode.
 532     //when a unicode appears then we must use a buffer that holds char internal values
 533     //At the end of this method currentCharacter holds the new visited char
 534     //and currentPosition points right next after it
 535     //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
 536     //On false, no side effect has occured.
 537
 538     //ALL getNextChar.... ARE OPTIMIZED COPIES
 539
 540     int temp = currentPosition;
 541     try {
 542       if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
 543         //-------------unicode traitement ------------
 544         int c1, c2, c3, c4;
 545         int unicodeSize = 6;
 546         currentPosition++;
 547         while (source[currentPosition] == 'u') {
 548           currentPosition++;
 549           unicodeSize++;
 550         }
 551
 552         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0)
 553           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0)
 554           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0)
 555           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) {
 556           currentPosition = temp;
 557           return false;
 558         }
 559
 560         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 561         if (!Character.isJavaIdentifierPart(currentCharacter)) {
 562           currentPosition = temp;
 563           return false;
 564         }
 565
 566         //need the unicode buffer
 567         if (withoutUnicodePtr == 0) {
 568           //buffer all the entries that have been left aside....
 569           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 570           System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
 571         }
 572         //fill the buffer with the char
 573         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 574         return true;
 575       } //-------------end unicode traitement--------------
 576       else {
 577         if (!Character.isJavaIdentifierPart(currentCharacter)) {
 578           currentPosition = temp;
 579           return false;
 580         }
 581
 582         if (withoutUnicodePtr != 0)
 583           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 584         return true;
 585       }
 586     } catch (IndexOutOfBoundsException e) {
 587       currentPosition = temp;
 588       return false;
 589     }
 590   }
 591   public int getNextToken() throws InvalidInputException {
 592
 593     this.wasAcr = false;
 594     if (diet) {
 595       jumpOverMethodBody();
 596       diet = false;
 597       return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
 598     }
 599     try {
 600       while (true) { //loop for jumping over comments
 601         withoutUnicodePtr = 0;
 602         //start with a new token (even comment written with unicode )
 603
 604         // ---------Consume white space and handles startPosition---------
 605         int whiteStart = currentPosition;
 606         boolean isWhiteSpace;
 607         do {
 608           startPosition = currentPosition;
 609           if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
 610             isWhiteSpace = jumpOverUnicodeWhiteSpace();
 611           } else {
 612             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 613               checkNonExternalizeString();
 614               if (recordLineSeparator) {
 615                 pushLineSeparator();
 616               } else {
 617                 currentLine = null;
 618               }
 619             }
 620             isWhiteSpace = (currentCharacter == ' ') || Character.isWhitespace(currentCharacter);
 621           }
 622         } while (isWhiteSpace);
 623         if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
 624           // reposition scanner in case we are interested by spaces as tokens
 625           currentPosition--;
 626           startPosition = whiteStart;
 627           return TokenNameWHITESPACE;
 628         }
 629         //little trick to get out in the middle of a source compuation
 630         if (currentPosition > eofPosition)
 631           return TokenNameEOF;
 632
 633         // ---------Identify the next token-------------
 634
 635         switch (currentCharacter) {
 636           case '(' :
 637             return TokenNameLPAREN;
 638           case ')' :
 639             return TokenNameRPAREN;
 640           case '{' :
 641             return TokenNameLBRACE;
 642           case '}' :
 643             return TokenNameRBRACE;
 644           case '[' :
 645             return TokenNameLBRACKET;
 646           case ']' :
 647             return TokenNameRBRACKET;
 648           case ';' :
 649             return TokenNameSEMICOLON;
 650           case ',' :
 651             return TokenNameCOMMA;
 652           case '.' :
 653             if (getNextCharAsDigit())
 654               return scanNumber(true);
 655             return TokenNameDOT;
 656           case '+' :
 657             {
 658               int test;
 659               if ((test = getNextChar('+', '=')) == 0)
 660                 return TokenNamePLUS_PLUS;
 661               if (test > 0)
 662                 return TokenNamePLUS_EQUAL;
 663               return TokenNamePLUS;
 664             }
 665           case '-' :
 666             {
 667               int test;
 668               if ((test = getNextChar('-', '=')) == 0)
 669                 return TokenNameMINUS_MINUS;
 670               if (test > 0)
 671                 return TokenNameMINUS_EQUAL;
 672               return TokenNameMINUS;
 673             }
 674           case '~' :
 675             return TokenNameTWIDDLE;
 676           case '!' :
 677             if (getNextChar('='))
 678               return TokenNameNOT_EQUAL;
 679             return TokenNameNOT;
 680           case '*' :
 681             if (getNextChar('='))
 682               return TokenNameMULTIPLY_EQUAL;
 683             return TokenNameMULTIPLY;
 684           case '%' :
 685             if (getNextChar('='))
 686               return TokenNameREMAINDER_EQUAL;
 687             return TokenNameREMAINDER;
 688           case '<' :
 689             {
 690               int test;
 691               if ((test = getNextChar('=', '<')) == 0)
 692                 return TokenNameLESS_EQUAL;
 693               if (test > 0) {
 694                 if (getNextChar('='))
 695                   return TokenNameLEFT_SHIFT_EQUAL;
 696                 return TokenNameLEFT_SHIFT;
 697               }
 698               return TokenNameLESS;
 699             }
 700           case '>' :
 701             {
 702               int test;
 703               if ((test = getNextChar('=', '>')) == 0)
 704                 return TokenNameGREATER_EQUAL;
 705               if (test > 0) {
 706                 if ((test = getNextChar('=', '>')) == 0)
 707                   return TokenNameRIGHT_SHIFT_EQUAL;
 708                 if (test > 0) {
 709                   if (getNextChar('='))
 710                     return TokenNameUNSIGNED_RIGHT_SHIFT_EQUAL;
 711                   return TokenNameUNSIGNED_RIGHT_SHIFT;
 712                 }
 713                 return TokenNameRIGHT_SHIFT;
 714               }
 715               return TokenNameGREATER;
 716             }
 717           case '=' :
 718             if (getNextChar('='))
 719               return TokenNameEQUAL_EQUAL;
 720             return TokenNameEQUAL;
 721           case '&' :
 722             {
 723               int test;
 724               if ((test = getNextChar('&', '=')) == 0)
 725                 return TokenNameAND_AND;
 726               if (test > 0)
 727                 return TokenNameAND_EQUAL;
 728               return TokenNameAND;
 729             }
 730           case '|' :
 731             {
 732               int test;
 733               if ((test = getNextChar('|', '=')) == 0)
 734                 return TokenNameOR_OR;
 735               if (test > 0)
 736                 return TokenNameOR_EQUAL;
 737               return TokenNameOR;
 738             }
 739           case '^' :
 740             if (getNextChar('='))
 741               return TokenNameXOR_EQUAL;
 742             return TokenNameXOR;
 743           case '?' :
 744             return TokenNameQUESTION;
 745           case ':' :
 746             return TokenNameCOLON;
 747           case '\'' :
 748             {
 749               int test;
 750               if ((test = getNextChar('\n', '\r')) == 0) {
 751                 throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
 752               }
 753               if (test > 0) {
 754                 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 755                 for (int lookAhead = 0; lookAhead < 3; lookAhead++) {
 756                   if (currentPosition + lookAhead == source.length)
 757                     break;
 758                   if (source[currentPosition + lookAhead] == '\n')
 759                     break;
 760                   if (source[currentPosition + lookAhead] == '\'') {
 761                     currentPosition += lookAhead + 1;
 762                     break;
 763                   }
 764                 }
 765                 throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
 766               }
 767             }
 768             if (getNextChar('\'')) {
 769               // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 770               for (int lookAhead = 0; lookAhead < 3; lookAhead++) {
 771                 if (currentPosition + lookAhead == source.length)
 772                   break;
 773                 if (source[currentPosition + lookAhead] == '\n')
 774                   break;
 775                 if (source[currentPosition + lookAhead] == '\'') {
 776                   currentPosition += lookAhead + 1;
 777                   break;
 778                 }
 779               }
 780               throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
 781             }
 782             if (getNextChar('\\'))
 783               scanEscapeCharacter();
 784             else { // consume next character
 785               unicodeAsBackSlash = false;
 786               if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
 787                 getNextUnicodeChar();
 788               } else {
 789                 if (withoutUnicodePtr != 0) {
 790                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 791                 }
 792               }
 793             }
 794             if (getNextChar('\''))
 795               return TokenNameCharacterLiteral;
 796             // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 797             for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
 798               if (currentPosition + lookAhead == source.length)
 799                 break;
 800               if (source[currentPosition + lookAhead] == '\n')
 801                 break;
 802               if (source[currentPosition + lookAhead] == '\'') {
 803                 currentPosition += lookAhead + 1;
 804                 break;
 805               }
 806             }
 807             throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
 808           case '"' :
 809             try {
 810               // consume next character
 811               unicodeAsBackSlash = false;
 812               if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
 813                 getNextUnicodeChar();
 814               } else {
 815                 if (withoutUnicodePtr != 0) {
 816                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 817                 }
 818               }
 819
 820               while (currentCharacter != '"') {
 821                 /**** \r and \n are not valid in string literals ****/
 822                 if ((currentCharacter == '\n') || (currentCharacter == '\r')) {
 823                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 824                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 825                     if (currentPosition + lookAhead == source.length)
 826                       break;
 827                     if (source[currentPosition + lookAhead] == '\n')
 828                       break;
 829                     if (source[currentPosition + lookAhead] == '\"') {
 830                       currentPosition += lookAhead + 1;
 831                       break;
 832                     }
 833                   }
 834                   throw new InvalidInputException(INVALID_CHAR_IN_STRING);
 835                 }
 836                 if (currentCharacter == '\\') {
 837                   int escapeSize = currentPosition;
 838                   boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
 839                   //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
 840                   scanEscapeCharacter();
 841                   escapeSize = currentPosition - escapeSize;
 842                   if (withoutUnicodePtr == 0) {
 843                     //buffer all the entries that have been left aside....
 844                     withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
 845                     System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
 846                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 847                   } else { //overwrite the / in the buffer
 848                     withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
 849                     if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
 850                       withoutUnicodePtr--;
 851                     }
 852                   }
 853                 }
 854                 // consume next character
 855                 unicodeAsBackSlash = false;
 856                 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
 857                   getNextUnicodeChar();
 858                 } else {
 859                   if (withoutUnicodePtr != 0) {
 860                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 861                   }
 862                 }
 863
 864               }
 865             } catch (IndexOutOfBoundsException e) {
 866               throw new InvalidInputException(UNTERMINATED_STRING);
 867             } catch (InvalidInputException e) {
 868               if (e.getMessage().equals(INVALID_ESCAPE)) {
 869                 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 870                 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 871                   if (currentPosition + lookAhead == source.length)
 872                     break;
 873                   if (source[currentPosition + lookAhead] == '\n')
 874                     break;
 875                   if (source[currentPosition + lookAhead] == '\"') {
 876                     currentPosition += lookAhead + 1;
 877                     break;
 878                   }
 879                 }
 880
 881               }
 882               throw e; // rethrow
 883             }
 884             if (checkNonExternalizedStringLiterals) { // check for presence of  NLS tags //$NON-NLS-?$ where ? is an int.
 885               if (currentLine == null) {
 886                 currentLine = new NLSLine();
 887                 lines.add(currentLine);
 888               }
 889               currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
 890             }
 891             return TokenNameStringLiteral;
 892           case '/' :
 893             {
 894               int test;
 895               if ((test = getNextChar('/', '*')) == 0) { //line comment
 896                 int endPositionForLineComment = 0;
 897                 try { //get the next char
 898                   if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
 899                     //-------------unicode traitement ------------
 900                     int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
 901                     currentPosition++;
 902                     while (source[currentPosition] == 'u') {
 903                       currentPosition++;
 904                     }
 905                     if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 906                       || c1 < 0
 907                       || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
 908                       || c2 < 0
 909                       || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
 910                       || c3 < 0
 911                       || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
 912                       || c4 < 0) {
 913                       throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
 914                     } else {
 915                       currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 916                     }
 917                   }
 918
 919                   //handle the \\u case manually into comment
 920                   if (currentCharacter == '\\') {
 921                     if (source[currentPosition] == '\\')
 922                       currentPosition++;
 923                   } //jump over the \\
 924                   boolean isUnicode = false;
 925                   while (currentCharacter != '\r' && currentCharacter != '\n') {
 926                     //get the next char
 927                     isUnicode = false;
 928                     if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
 929                       isUnicode = true;
 930                       //-------------unicode traitement ------------
 931                       int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
 932                       currentPosition++;
 933                       while (source[currentPosition] == 'u') {
 934                         currentPosition++;
 935                       }
 936                       if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 937                         || c1 < 0
 938                         || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
 939                         || c2 < 0
 940                         || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
 941                         || c3 < 0
 942                         || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
 943                         || c4 < 0) {
 944                         throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
 945                       } else {
 946                         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 947                       }
 948                     }
 949                     //handle the \\u case manually into comment
 950                     if (currentCharacter == '\\') {
 951                       if (source[currentPosition] == '\\')
 952                         currentPosition++;
 953                     } //jump over the \\
 954                   }
 955                   if (isUnicode) {
 956                     endPositionForLineComment = currentPosition - 6;
 957                   } else {
 958                     endPositionForLineComment = currentPosition - 1;
 959                   }
 960                   recordComment(false);
 961                   if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 962                     checkNonExternalizeString();
 963                     if (recordLineSeparator) {
 964                       if (isUnicode) {
 965                         pushUnicodeLineSeparator();
 966                       } else {
 967                         pushLineSeparator();
 968                       }
 969                     } else {
 970                       currentLine = null;
 971                     }
 972                   }
 973                   if (tokenizeComments) {
 974                     if (!isUnicode) {
 975                       currentPosition = endPositionForLineComment; // reset one character behind
 976                     }
 977                     return TokenNameCOMMENT_LINE;
 978                   }
 979                 } catch (IndexOutOfBoundsException e) { //an eof will them be generated
 980                   if (tokenizeComments) {
 981                     currentPosition--; // reset one character behind
 982                     return TokenNameCOMMENT_LINE;
 983                   }
 984                 }
 985                 break;
 986               }
 987               if (test > 0) { //traditional and annotation comment
 988                 boolean isJavadoc = false, star = false;
 989                 // consume next character
 990                 unicodeAsBackSlash = false;
 991                 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
 992                   getNextUnicodeChar();
 993                 } else {
 994                   if (withoutUnicodePtr != 0) {
 995                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 996                   }
 997                 }
 998
 999                 if (currentCharacter == '*') {
1000                   isJavadoc = true;
1001                   star = true;
1002                 }
1003                 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1004                   checkNonExternalizeString();
1005                   if (recordLineSeparator) {
1006                     pushLineSeparator();
1007                   } else {
1008                     currentLine = null;
1009                   }
1010                 }
1011                 try { //get the next char
1012                   if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
1013                     //-------------unicode traitement ------------
1014                     getNextUnicodeChar();
1015                   }
1016                   //handle the \\u case manually into comment
1017                   if (currentCharacter == '\\') {
1018                     if (source[currentPosition] == '\\')
1019                       currentPosition++; //jump over the \\
1020                   }
1021                   // empty comment is not a javadoc /**/
1022                   if (currentCharacter == '/') {
1023                     isJavadoc = false;
1024                   }
1025                   //loop until end of comment */
1026                   while ((currentCharacter != '/') || (!star)) {
1027                     if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1028                       checkNonExternalizeString();
1029                       if (recordLineSeparator) {
1030                         pushLineSeparator();
1031                       } else {
1032                         currentLine = null;
1033                       }
1034                     }
1035                     star = currentCharacter == '*';
1036                     //get next char
1037                     if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
1038                       //-------------unicode traitement ------------
1039                       getNextUnicodeChar();
1040                     }
1041                     //handle the \\u case manually into comment
1042                     if (currentCharacter == '\\') {
1043                       if (source[currentPosition] == '\\')
1044                         currentPosition++;
1045                     } //jump over the \\
1046                   }
1047                   recordComment(isJavadoc);
1048                   if (tokenizeComments) {
1049                     if (isJavadoc)
1050                       return TokenNameCOMMENT_JAVADOC;
1051                     return TokenNameCOMMENT_BLOCK;
1052                   }
1053                 } catch (IndexOutOfBoundsException e) {
1054                   throw new InvalidInputException(UNTERMINATED_COMMENT);
1055                 }
1056                 break;
1057               }
1058               if (getNextChar('='))
1059                 return TokenNameDIVIDE_EQUAL;
1060               return TokenNameDIVIDE;
1061             }
1062           case '\u001a' :
1063             if (atEnd())
1064               return TokenNameEOF;
1065             //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1066             throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1067
1068           default :
1069             if (Character.isJavaIdentifierStart(currentCharacter))
1070               return scanIdentifierOrKeyword();
1071             if (Character.isDigit(currentCharacter))
1072               return scanNumber(false);
1073             return TokenNameERROR;
1074         }
1075       }
1076     } //-----------------end switch while try--------------------
1077     catch (IndexOutOfBoundsException e) {
1078     }
1079     return TokenNameEOF;
1080   }
1081   public final void getNextUnicodeChar() throws IndexOutOfBoundsException, InvalidInputException {
1082     //VOID
1083     //handle the case of unicode.
1084     //when a unicode appears then we must use a buffer that holds char internal values
1085     //At the end of this method currentCharacter holds the new visited char
1086     //and currentPosition points right next after it
1087
1088     //ALL getNextChar.... ARE OPTIMIZED COPIES
1089
1090     int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1091     currentPosition++;
1092     while (source[currentPosition] == 'u') {
1093       currentPosition++;
1094       unicodeSize++;
1095     }
1096
1097     if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1098       || c1 < 0
1099       || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1100       || c2 < 0
1101       || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1102       || c3 < 0
1103       || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1104       || c4 < 0) {
1105       throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1106     } else {
1107       currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1108       //need the unicode buffer
1109       if (withoutUnicodePtr == 0) {
1110         //buffer all the entries that have been left aside....
1111         withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1112         System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1113       }
1114       //fill the buffer with the char
1115       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1116     }
1117     unicodeAsBackSlash = currentCharacter == '\\';
1118   }
1119   /* Tokenize a method body, assuming that curly brackets are properly balanced.
1120    */
1121   public final void jumpOverMethodBody() {
1122
1123     this.wasAcr = false;
1124     int found = 1;
1125     try {
1126       while (true) { //loop for jumping over comments
1127         // ---------Consume white space and handles startPosition---------
1128         boolean isWhiteSpace;
1129         do {
1130           startPosition = currentPosition;
1131           if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
1132             isWhiteSpace = jumpOverUnicodeWhiteSpace();
1133           } else {
1134             if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1135               pushLineSeparator();
1136             isWhiteSpace = Character.isWhitespace(currentCharacter);
1137           }
1138         } while (isWhiteSpace);
1139
1140         // -------consume token until } is found---------
1141         switch (currentCharacter) {
1142           case '{' :
1143             found++;
1144             break;
1145           case '}' :
1146             found--;
1147             if (found == 0)
1148               return;
1149             break;
1150           case '\'' :
1151             {
1152               boolean test;
1153               test = getNextChar('\\');
1154               if (test) {
1155                 try {
1156                   scanEscapeCharacter();
1157                 } catch (InvalidInputException ex) {
1158                 };
1159               } else {
1160                 try { // consume next character
1161                   unicodeAsBackSlash = false;
1162                   if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
1163                     getNextUnicodeChar();
1164                   } else {
1165                     if (withoutUnicodePtr != 0) {
1166                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1167                     }
1168                   }
1169                 } catch (InvalidInputException ex) {
1170                 };
1171               }
1172               getNextChar('\'');
1173               break;
1174             }
1175           case '"' :
1176             try {
1177               try { // consume next character
1178                 unicodeAsBackSlash = false;
1179                 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
1180                   getNextUnicodeChar();
1181                 } else {
1182                   if (withoutUnicodePtr != 0) {
1183                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1184                   }
1185                 }
1186               } catch (InvalidInputException ex) {
1187               };
1188               while (currentCharacter != '"') {
1189                 if (currentCharacter == '\r') {
1190                   if (source[currentPosition] == '\n')
1191                     currentPosition++;
1192                   break; // the string cannot go further that the line
1193                 }
1194                 if (currentCharacter == '\n') {
1195                   break; // the string cannot go further that the line
1196                 }
1197                 if (currentCharacter == '\\') {
1198                   try {
1199                     scanEscapeCharacter();
1200                   } catch (InvalidInputException ex) {
1201                   };
1202                 }
1203                 try { // consume next character
1204                   unicodeAsBackSlash = false;
1205                   if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
1206                     getNextUnicodeChar();
1207                   } else {
1208                     if (withoutUnicodePtr != 0) {
1209                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1210                     }
1211                   }
1212                 } catch (InvalidInputException ex) {
1213                 };
1214               }
1215             } catch (IndexOutOfBoundsException e) {
1216               return;
1217             }
1218             break;
1219           case '/' :
1220             {
1221               int test;
1222               if ((test = getNextChar('/', '*')) == 0) { //line comment
1223                 try {
1224                   //get the next char
1225                   if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
1226                     //-------------unicode traitement ------------
1227                     int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1228                     currentPosition++;
1229                     while (source[currentPosition] == 'u') {
1230                       currentPosition++;
1231                     }
1232                     if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1233                       || c1 < 0
1234                       || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1235                       || c2 < 0
1236                       || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1237                       || c3 < 0
1238                       || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1239                       || c4 < 0) { //error don't care of the value
1240                       currentCharacter = 'A';
1241                     } //something different from \n and \r
1242                     else {
1243                       currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1244                     }
1245                   }
1246
1247                   while (currentCharacter != '\r' && currentCharacter != '\n') {
1248                     //get the next char
1249                     if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
1250                       //-------------unicode traitement ------------
1251                       int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1252                       currentPosition++;
1253                       while (source[currentPosition] == 'u') {
1254                         currentPosition++;
1255                       }
1256                       if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1257                         || c1 < 0
1258                         || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1259                         || c2 < 0
1260                         || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1261                         || c3 < 0
1262                         || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1263                         || c4 < 0) { //error don't care of the value
1264                         currentCharacter = 'A';
1265                       } //something different from \n and \r
1266                       else {
1267                         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1268                       }
1269                     }
1270                   }
1271                   if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1272                     pushLineSeparator();
1273                 } catch (IndexOutOfBoundsException e) {
1274                 } //an eof will them be generated
1275                 break;
1276               }
1277               if (test > 0) { //traditional and annotation comment
1278                 boolean star = false;
1279                 try { // consume next character
1280                   unicodeAsBackSlash = false;
1281                   if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
1282                     getNextUnicodeChar();
1283                   } else {
1284                     if (withoutUnicodePtr != 0) {
1285                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1286                     }
1287                   };
1288                 } catch (InvalidInputException ex) {
1289                 };
1290                 if (currentCharacter == '*') {
1291                   star = true;
1292                 }
1293                 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1294                   pushLineSeparator();
1295                 try { //get the next char
1296                   if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
1297                     //-------------unicode traitement ------------
1298                     int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1299                     currentPosition++;
1300                     while (source[currentPosition] == 'u') {
1301                       currentPosition++;
1302                     }
1303                     if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1304                       || c1 < 0
1305                       || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1306                       || c2 < 0
1307                       || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1308                       || c3 < 0
1309                       || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1310                       || c4 < 0) { //error don't care of the value
1311                       currentCharacter = 'A';
1312                     } //something different from * and /
1313                     else {
1314                       currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1315                     }
1316                   }
1317                   //loop until end of comment */
1318                   while ((currentCharacter != '/') || (!star)) {
1319                     if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1320                       pushLineSeparator();
1321                     star = currentCharacter == '*';
1322                     //get next char
1323                     if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
1324                       //-------------unicode traitement ------------
1325                       int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1326                       currentPosition++;
1327                       while (source[currentPosition] == 'u') {
1328                         currentPosition++;
1329                       }
1330                       if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1331                         || c1 < 0
1332                         || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1333                         || c2 < 0
1334                         || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1335                         || c3 < 0
1336                         || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1337                         || c4 < 0) { //error don't care of the value
1338                         currentCharacter = 'A';
1339                       } //something different from * and /
1340                       else {
1341                         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1342                       }
1343                     }
1344                   }
1345                 } catch (IndexOutOfBoundsException e) {
1346                   return;
1347                 }
1348                 break;
1349               }
1350               break;
1351             }
1352
1353           default :
1354             if (Character.isJavaIdentifierStart(currentCharacter)) {
1355               try {
1356                 scanIdentifierOrKeyword();
1357               } catch (InvalidInputException ex) {
1358               };
1359               break;
1360             }
1361             if (Character.isDigit(currentCharacter)) {
1362               try {
1363                 scanNumber(false);
1364               } catch (InvalidInputException ex) {
1365               };
1366               break;
1367             }
1368         }
1369       }
1370       //-----------------end switch while try--------------------
1371     } catch (IndexOutOfBoundsException e) {
1372     } catch (InvalidInputException e) {
1373     }
1374     return;
1375   }
1376   public final boolean jumpOverUnicodeWhiteSpace() throws InvalidInputException {
1377     //BOOLEAN
1378     //handle the case of unicode. Jump over the next whiteSpace
1379     //making startPosition pointing on the next available char
1380     //On false, the currentCharacter is filled up with a potential
1381     //correct char
1382
1383     try {
1384       this.wasAcr = false;
1385       int c1, c2, c3, c4;
1386       int unicodeSize = 6;
1387       currentPosition++;
1388       while (source[currentPosition] == 'u') {
1389         currentPosition++;
1390         unicodeSize++;
1391       }
1392
1393       if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0)
1394         || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0)
1395         || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0)
1396         || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) {
1397         throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1398       }
1399
1400       currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1401       if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1402         pushLineSeparator();
1403       if (Character.isWhitespace(currentCharacter))
1404         return true;
1405
1406       //buffer the new char which is not a white space
1407       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1408       //withoutUnicodePtr == 1 is true here
1409       return false;
1410     } catch (IndexOutOfBoundsException e) {
1411       throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1412     }
1413   }
1414   public final int[] getLineEnds() {
1415     //return a bounded copy of this.lineEnds
1416
1417     int[] copy;
1418     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
1419     return copy;
1420   }
1421
1422   public char[] getSource() {
1423     return this.source;
1424   }
1425   final char[] optimizedCurrentTokenSource1() {
1426     //return always the same char[] build only once
1427
1428     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
1429     char charOne = source[startPosition];
1430     switch (charOne) {
1431       case 'a' :
1432         return charArray_a;
1433       case 'b' :
1434         return charArray_b;
1435       case 'c' :
1436         return charArray_c;
1437       case 'd' :
1438         return charArray_d;
1439       case 'e' :
1440         return charArray_e;
1441       case 'f' :
1442         return charArray_f;
1443       case 'g' :
1444         return charArray_g;
1445       case 'h' :
1446         return charArray_h;
1447       case 'i' :
1448         return charArray_i;
1449       case 'j' :
1450         return charArray_j;
1451       case 'k' :
1452         return charArray_k;
1453       case 'l' :
1454         return charArray_l;
1455       case 'm' :
1456         return charArray_m;
1457       case 'n' :
1458         return charArray_n;
1459       case 'o' :
1460         return charArray_o;
1461       case 'p' :
1462         return charArray_p;
1463       case 'q' :
1464         return charArray_q;
1465       case 'r' :
1466         return charArray_r;
1467       case 's' :
1468         return charArray_s;
1469       case 't' :
1470         return charArray_t;
1471       case 'u' :
1472         return charArray_u;
1473       case 'v' :
1474         return charArray_v;
1475       case 'w' :
1476         return charArray_w;
1477       case 'x' :
1478         return charArray_x;
1479       case 'y' :
1480         return charArray_y;
1481       case 'z' :
1482         return charArray_z;
1483       default :
1484         return new char[] { charOne };
1485     }
1486   }
1487   final char[] optimizedCurrentTokenSource2() {
1488     //try to return the same char[] build only once
1489
1490     char c0, c1;
1491     int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) % TableSize;
1492     char[][] table = charArray_length[0][hash];
1493     int i = newEntry2;
1494     while (++i < InternalTableSize) {
1495       char[] charArray = table[i];
1496       if ((c0 == charArray[0]) && (c1 == charArray[1]))
1497         return charArray;
1498     }
1499     //---------other side---------
1500     i = -1;
1501     int max = newEntry2;
1502     while (++i <= max) {
1503       char[] charArray = table[i];
1504       if ((c0 == charArray[0]) && (c1 == charArray[1]))
1505         return charArray;
1506     }
1507     //--------add the entry-------
1508     if (++max >= InternalTableSize)
1509       max = 0;
1510     char[] r;
1511     table[max] = (r = new char[] { c0, c1 });
1512     newEntry2 = max;
1513     return r;
1514   }
1515   final char[] optimizedCurrentTokenSource3() {
1516     //try to return the same char[] build only once
1517
1518     char c0, c1, c2;
1519     int hash =
1520       (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
1521         % TableSize;
1522     char[][] table = charArray_length[1][hash];
1523     int i = newEntry3;
1524     while (++i < InternalTableSize) {
1525       char[] charArray = table[i];
1526       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
1527         return charArray;
1528     }
1529     //---------other side---------
1530     i = -1;
1531     int max = newEntry3;
1532     while (++i <= max) {
1533       char[] charArray = table[i];
1534       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
1535         return charArray;
1536     }
1537     //--------add the entry-------
1538     if (++max >= InternalTableSize)
1539       max = 0;
1540     char[] r;
1541     table[max] = (r = new char[] { c0, c1, c2 });
1542     newEntry3 = max;
1543     return r;
1544   }
1545   final char[] optimizedCurrentTokenSource4() {
1546     //try to return the same char[] build only once
1547
1548     char c0, c1, c2, c3;
1549     long hash =
1550       ((((long) (c0 = source[startPosition])) << 18)
1551         + ((c1 = source[startPosition + 1]) << 12)
1552         + ((c2 = source[startPosition + 2]) << 6)
1553         + (c3 = source[startPosition + 3]))
1554         % TableSize;
1555     char[][] table = charArray_length[2][(int) hash];
1556     int i = newEntry4;
1557     while (++i < InternalTableSize) {
1558       char[] charArray = table[i];
1559       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
1560         return charArray;
1561     }
1562     //---------other side---------
1563     i = -1;
1564     int max = newEntry4;
1565     while (++i <= max) {
1566       char[] charArray = table[i];
1567       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
1568         return charArray;
1569     }
1570     //--------add the entry-------
1571     if (++max >= InternalTableSize)
1572       max = 0;
1573     char[] r;
1574     table[max] = (r = new char[] { c0, c1, c2, c3 });
1575     newEntry4 = max;
1576     return r;
1577
1578   }
1579   final char[] optimizedCurrentTokenSource5() {
1580     //try to return the same char[] build only once
1581
1582     char c0, c1, c2, c3, c4;
1583     long hash =
1584       ((((long) (c0 = source[startPosition])) << 24)
1585         + (((long) (c1 = source[startPosition + 1])) << 18)
1586         + ((c2 = source[startPosition + 2]) << 12)
1587         + ((c3 = source[startPosition + 3]) << 6)
1588         + (c4 = source[startPosition + 4]))
1589         % TableSize;
1590     char[][] table = charArray_length[3][(int) hash];
1591     int i = newEntry5;
1592     while (++i < InternalTableSize) {
1593       char[] charArray = table[i];
1594       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
1595         return charArray;
1596     }
1597     //---------other side---------
1598     i = -1;
1599     int max = newEntry5;
1600     while (++i <= max) {
1601       char[] charArray = table[i];
1602       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
1603         return charArray;
1604     }
1605     //--------add the entry-------
1606     if (++max >= InternalTableSize)
1607       max = 0;
1608     char[] r;
1609     table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
1610     newEntry5 = max;
1611     return r;
1612
1613   }
1614   final char[] optimizedCurrentTokenSource6() {
1615     //try to return the same char[] build only once
1616
1617     char c0, c1, c2, c3, c4, c5;
1618     long hash =
1619       ((((long) (c0 = source[startPosition])) << 32)
1620         + (((long) (c1 = source[startPosition + 1])) << 24)
1621         + (((long) (c2 = source[startPosition + 2])) << 18)
1622         + ((c3 = source[startPosition + 3]) << 12)
1623         + ((c4 = source[startPosition + 4]) << 6)
1624         + (c5 = source[startPosition + 5]))
1625         % TableSize;
1626     char[][] table = charArray_length[4][(int) hash];
1627     int i = newEntry6;
1628     while (++i < InternalTableSize) {
1629       char[] charArray = table[i];
1630       if ((c0 == charArray[0])
1631         && (c1 == charArray[1])
1632         && (c2 == charArray[2])
1633         && (c3 == charArray[3])
1634         && (c4 == charArray[4])
1635         && (c5 == charArray[5]))
1636         return charArray;
1637     }
1638     //---------other side---------
1639     i = -1;
1640     int max = newEntry6;
1641     while (++i <= max) {
1642       char[] charArray = table[i];
1643       if ((c0 == charArray[0])
1644         && (c1 == charArray[1])
1645         && (c2 == charArray[2])
1646         && (c3 == charArray[3])
1647         && (c4 == charArray[4])
1648         && (c5 == charArray[5]))
1649         return charArray;
1650     }
1651     //--------add the entry-------
1652     if (++max >= InternalTableSize)
1653       max = 0;
1654     char[] r;
1655     table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
1656     newEntry6 = max;
1657     return r;
1658   }
1659   public final void pushLineSeparator() throws InvalidInputException {
1660     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
1661     final int INCREMENT = 250;
1662
1663     if (this.checkNonExternalizedStringLiterals) {
1664       // reinitialize the current line for non externalize strings purpose
1665       currentLine = null;
1666     }
1667     //currentCharacter is at position currentPosition-1
1668
1669     // cr 000D
1670     if (currentCharacter == '\r') {
1671       int separatorPos = currentPosition - 1;
1672       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
1673         return;
1674       //System.out.println("CR-" + separatorPos);
1675       try {
1676         lineEnds[++linePtr] = separatorPos;
1677       } catch (IndexOutOfBoundsException e) {
1678         //linePtr value is correct
1679         int oldLength = lineEnds.length;
1680         int[] old = lineEnds;
1681         lineEnds = new int[oldLength + INCREMENT];
1682         System.arraycopy(old, 0, lineEnds, 0, oldLength);
1683         lineEnds[linePtr] = separatorPos;
1684       }
1685       // look-ahead for merged cr+lf
1686       try {
1687         if (source[currentPosition] == '\n') {
1688           //System.out.println("look-ahead LF-" + currentPosition);
1689           lineEnds[linePtr] = currentPosition;
1690           currentPosition++;
1691           wasAcr = false;
1692         } else {
1693           wasAcr = true;
1694         }
1695       } catch (IndexOutOfBoundsException e) {
1696         wasAcr = true;
1697       }
1698     } else {
1699       // lf 000A
1700       if (currentCharacter == '\n') { //must merge eventual cr followed by lf
1701         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
1702           //System.out.println("merge LF-" + (currentPosition - 1));
1703           lineEnds[linePtr] = currentPosition - 1;
1704         } else {
1705           int separatorPos = currentPosition - 1;
1706           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
1707             return;
1708           // System.out.println("LF-" + separatorPos);
1709           try {
1710             lineEnds[++linePtr] = separatorPos;
1711           } catch (IndexOutOfBoundsException e) {
1712             //linePtr value is correct
1713             int oldLength = lineEnds.length;
1714             int[] old = lineEnds;
1715             lineEnds = new int[oldLength + INCREMENT];
1716             System.arraycopy(old, 0, lineEnds, 0, oldLength);
1717             lineEnds[linePtr] = separatorPos;
1718           }
1719         }
1720         wasAcr = false;
1721       }
1722     }
1723   }
1724   public final void pushUnicodeLineSeparator() {
1725     // isUnicode means that the \r or \n has been read as a unicode character
1726
1727     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
1728
1729     final int INCREMENT = 250;
1730     //currentCharacter is at position currentPosition-1
1731
1732     if (this.checkNonExternalizedStringLiterals) {
1733       // reinitialize the current line for non externalize strings purpose
1734       currentLine = null;
1735     }
1736
1737     // cr 000D
1738     if (currentCharacter == '\r') {
1739       int separatorPos = currentPosition - 6;
1740       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
1741         return;
1742       //System.out.println("CR-" + separatorPos);
1743       try {
1744         lineEnds[++linePtr] = separatorPos;
1745       } catch (IndexOutOfBoundsException e) {
1746         //linePtr value is correct
1747         int oldLength = lineEnds.length;
1748         int[] old = lineEnds;
1749         lineEnds = new int[oldLength + INCREMENT];
1750         System.arraycopy(old, 0, lineEnds, 0, oldLength);
1751         lineEnds[linePtr] = separatorPos;
1752       }
1753       // look-ahead for merged cr+lf
1754       if (source[currentPosition] == '\n') {
1755         //System.out.println("look-ahead LF-" + currentPosition);
1756         lineEnds[linePtr] = currentPosition;
1757         currentPosition++;
1758         wasAcr = false;
1759       } else {
1760         wasAcr = true;
1761       }
1762     } else {
1763       // lf 000A
1764       if (currentCharacter == '\n') { //must merge eventual cr followed by lf
1765         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
1766           //System.out.println("merge LF-" + (currentPosition - 1));
1767           lineEnds[linePtr] = currentPosition - 6;
1768         } else {
1769           int separatorPos = currentPosition - 6;
1770           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
1771             return;
1772           // System.out.println("LF-" + separatorPos);
1773           try {
1774             lineEnds[++linePtr] = separatorPos;
1775           } catch (IndexOutOfBoundsException e) {
1776             //linePtr value is correct
1777             int oldLength = lineEnds.length;
1778             int[] old = lineEnds;
1779             lineEnds = new int[oldLength + INCREMENT];
1780             System.arraycopy(old, 0, lineEnds, 0, oldLength);
1781             lineEnds[linePtr] = separatorPos;
1782           }
1783         }
1784         wasAcr = false;
1785       }
1786     }
1787   }
1788   public final void recordComment(boolean isJavadoc) {
1789
1790     // a new annotation comment is recorded
1791     try {
1792       commentStops[++commentPtr] = isJavadoc ? currentPosition : -currentPosition;
1793     } catch (IndexOutOfBoundsException e) {
1794       int oldStackLength = commentStops.length;
1795       int[] oldStack = commentStops;
1796       commentStops = new int[oldStackLength + 30];
1797       System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
1798       commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
1799       //grows the positions buffers too
1800       int[] old = commentStarts;
1801       commentStarts = new int[oldStackLength + 30];
1802       System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
1803     }
1804
1805     //the buffer is of a correct size here
1806     commentStarts[commentPtr] = startPosition;
1807   }
1808   public void resetTo(int begin, int end) {
1809     //reset the scanner to a given position where it may rescan again
1810
1811     diet = false;
1812     initialPosition = startPosition = currentPosition = begin;
1813     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
1814     commentPtr = -1; // reset comment stack
1815   }
1816
1817   public final void scanEscapeCharacter() throws InvalidInputException {
1818     // the string with "\\u" is a legal string of two chars \ and u
1819     //thus we use a direct access to the source (for regular cases).
1820
1821     if (unicodeAsBackSlash) {
1822       // consume next character
1823       unicodeAsBackSlash = false;
1824       if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
1825         getNextUnicodeChar();
1826       } else {
1827         if (withoutUnicodePtr != 0) {
1828           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1829         }
1830       }
1831     } else
1832       currentCharacter = source[currentPosition++];
1833     switch (currentCharacter) {
1834       case 'b' :
1835         currentCharacter = '\b';
1836         break;
1837       case 't' :
1838         currentCharacter = '\t';
1839         break;
1840       case 'n' :
1841         currentCharacter = '\n';
1842         break;
1843       case 'f' :
1844         currentCharacter = '\f';
1845         break;
1846       case 'r' :
1847         currentCharacter = '\r';
1848         break;
1849       case '\"' :
1850         currentCharacter = '\"';
1851         break;
1852       case '\'' :
1853         currentCharacter = '\'';
1854         break;
1855       case '\\' :
1856         currentCharacter = '\\';
1857         break;
1858       default :
1859         // -----------octal escape--------------
1860         // OctalDigit
1861         // OctalDigit OctalDigit
1862         // ZeroToThree OctalDigit OctalDigit
1863
1864         int number = Character.getNumericValue(currentCharacter);
1865         if (number >= 0 && number <= 7) {
1866           boolean zeroToThreeNot = number > 3;
1867           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
1868             int digit = Character.getNumericValue(currentCharacter);
1869             if (digit >= 0 && digit <= 7) {
1870               number = (number * 8) + digit;
1871               if (Character.isDigit(currentCharacter = source[currentPosition++])) {
1872                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
1873                   currentPosition--;
1874                 } else {
1875                   digit = Character.getNumericValue(currentCharacter);
1876                   if (digit >= 0 && digit <= 7) { // has read \ZeroToThree OctalDigit OctalDigit
1877                     number = (number * 8) + digit;
1878                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
1879                     currentPosition--;
1880                   }
1881                 }
1882               } else { // has read \OctalDigit NonDigit--> ignore last character
1883                 currentPosition--;
1884               }
1885             } else { // has read \OctalDigit NonOctalDigit--> ignore last character
1886               currentPosition--;
1887             }
1888           } else { // has read \OctalDigit --> ignore last character
1889             currentPosition--;
1890           }
1891           if (number > 255)
1892             throw new InvalidInputException(INVALID_ESCAPE);
1893           currentCharacter = (char) number;
1894         } else
1895           throw new InvalidInputException(INVALID_ESCAPE);
1896     }
1897   }
1898   public int scanIdentifierOrKeyword() throws InvalidInputException {
1899     //test keywords
1900
1901     //first dispatch on the first char.
1902     //then the length. If there are several
1903     //keywors with the same length AND the same first char, then do another
1904     //disptach on the second char :-)...cool....but fast !
1905     useAssertAsAnIndentifier = false;
1906     while (getNextCharAsJavaIdentifierPart()) {
1907     };
1908
1909     int index, length;
1910     char[] data;
1911     char firstLetter;
1912     if (withoutUnicodePtr == 0)
1913
1914       //quick test on length == 1 but not on length > 12 while most identifier
1915       //have a length which is <= 12...but there are lots of identifier with
1916       //only one char....
1917
1918       {
1919       if ((length = currentPosition - startPosition) == 1)
1920         return TokenNameIdentifier;
1921       data = source;
1922       index = startPosition;
1923     } else {
1924       if ((length = withoutUnicodePtr) == 1)
1925         return TokenNameIdentifier;
1926       data = withoutUnicodeBuffer;
1927       index = 1;
1928     }
1929
1930     firstLetter = data[index];
1931     switch (firstLetter) {
1932
1933       case 'a' : // as and array
1934         switch (length) {
1935           case 2 : //as
1936             if ((data[++index] == 's')) {
1937               return TokenNameas;
1938             } else {
1939               return TokenNameIdentifier;
1940             }
1941           case 3 : //and
1942             if ((data[++index] == 'n') && (data[++index] == 'd')) {
1943               return TokenNameas;
1944             } else {
1945               return TokenNameIdentifier;
1946             }
1947             //          case 5 :
1948             //            if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
1949             //              return TokenNamearray;
1950             //            else
1951             //              return TokenNameIdentifier;
1952           default :
1953             return TokenNameIdentifier;
1954         }
1955       case 'b' : //break
1956         switch (length) {
1957           case 5 :
1958             if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
1959               return TokenNamebreak;
1960             else
1961               return TokenNameIdentifier;
1962           default :
1963             return TokenNameIdentifier;
1964         }
1965
1966       case 'c' : //case class continue
1967         switch (length) {
1968           case 4 :
1969             if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
1970               return TokenNamecase;
1971             else
1972               return TokenNameIdentifier;
1973           case 5 :
1974             if ((data[index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
1975               return TokenNameclass;
1976             else
1977               return TokenNameIdentifier;
1978           case 8 :
1979             if ((data[++index] == 'o')
1980               && (data[++index] == 'n')
1981               && (data[++index] == 't')
1982               && (data[++index] == 'i')
1983               && (data[++index] == 'n')
1984               && (data[++index] == 'u')
1985               && (data[++index] == 'e'))
1986               return TokenNamecontinue;
1987             else
1988               return TokenNameIdentifier;
1989           default :
1990             return TokenNameIdentifier;
1991         }
1992
1993       case 'd' : //define default do
1994         switch (length) {
1995           case 2 :
1996             if ((data[++index] == 'o'))
1997               return TokenNamedo;
1998             else
1999               return TokenNameIdentifier;
2000           case 6 :
2001             if ((data[++index] == 'e')
2002               && (data[++index] == 'f')
2003               && (data[++index] == 'i')
2004               && (data[++index] == 'n')
2005               && (data[++index] == 'e'))
2006               return TokenNamedefine;
2007             else
2008               return TokenNameIdentifier;
2009           case 7 :
2010             if ((data[++index] == 'e')
2011               && (data[++index] == 'f')
2012               && (data[++index] == 'a')
2013               && (data[++index] == 'u')
2014               && (data[++index] == 'l')
2015               && (data[++index] == 't'))
2016               return TokenNamedefault;
2017             else
2018               return TokenNameIdentifier;
2019           default :
2020             return TokenNameIdentifier;
2021         }
2022       case 'e' : //echo else elseif extends
2023         switch (length) {
2024           case 4 :
2025             if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
2026               return TokenNameecho;
2027             else if ((data[++index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2028               return TokenNameelse;
2029             else
2030               return TokenNameIdentifier;
2031           case 5 : // endif
2032             if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
2033               return TokenNameendif;
2034             else
2035               return TokenNameIdentifier;
2036           case 6 : // endfor
2037             if ((data[++index] == 'n')
2038               && (data[++index] == 'd')
2039               && (data[++index] == 'f')
2040               && (data[++index] == 'o')
2041               && (data[++index] == 'r'))
2042               return TokenNameendfor;
2043             else if (
2044               (data[++index] == 'l')
2045                 && (data[++index] == 's')
2046                 && (data[++index] == 'e')
2047                 && (data[++index] == 'i')
2048                 && (data[++index] == 'f'))
2049               return TokenNameelseif;
2050             else
2051               return TokenNameIdentifier;
2052           case 7 :
2053             if ((data[++index] == 'x')
2054               && (data[++index] == 't')
2055               && (data[++index] == 'e')
2056               && (data[++index] == 'n')
2057               && (data[++index] == 'd')
2058               && (data[++index] == 's'))
2059               return TokenNameextends;
2060             else
2061               return TokenNameIdentifier;
2062           case 8 : // endwhile
2063             if ((data[++index] == 'n')
2064               && (data[++index] == 'd')
2065               && (data[++index] == 'w')
2066               && (data[++index] == 'h')
2067               && (data[++index] == 'i')
2068               && (data[++index] == 'l')
2069               && (data[++index] == 'e'))
2070               return TokenNameendwhile;
2071             else
2072               return TokenNameIdentifier;
2073           case 9 : // endswitch
2074             if ((data[++index] == 'n')
2075               && (data[++index] == 'd')
2076               && (data[++index] == 's')
2077               && (data[++index] == 'w')
2078               && (data[++index] == 'i')
2079               && (data[++index] == 't')
2080               && (data[++index] == 'c')
2081               && (data[++index] == 'h'))
2082               return TokenNameendswitch;
2083             else
2084               return TokenNameIdentifier;
2085           case 10 : // endforeach
2086             if ((data[++index] == 'n')
2087               && (data[++index] == 'd')
2088               && (data[++index] == 'f')
2089               && (data[++index] == 'o')
2090               && (data[++index] == 'r')
2091               && (data[++index] == 'e')
2092               && (data[++index] == 'a')
2093               && (data[++index] == 'c')
2094               && (data[++index] == 'h'))
2095               return TokenNameendforeach;
2096             else
2097               return TokenNameIdentifier;
2098
2099           default :
2100             return TokenNameIdentifier;
2101         }
2102
2103       case 'f' : //for false function
2104         switch (length) {
2105           case 3 :
2106             if ((data[++index] == 'o') && (data[++index] == 'r'))
2107               return TokenNamefor;
2108             else
2109               return TokenNameIdentifier;
2110           case 5 :
2111             if ((data[index] == 'a') && (data[++index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2112               return TokenNamefalse;
2113             else
2114               return TokenNameIdentifier;
2115           case 8 : // function
2116             if ((data[index] == 'u')
2117               && (data[++index] == 'n')
2118               && (data[++index] == 'c')
2119               && (data[++index] == 't')
2120               && (data[++index] == 'i')
2121               && (data[++index] == 'o')
2122               && (data[++index] == 'n'))
2123               return TokenNamefunction;
2124             else
2125               return TokenNameIdentifier;
2126           default :
2127             return TokenNameIdentifier;
2128         }
2129       case 'g' : //global
2130         if (length == 6) {
2131           if ((data[++index] == 'l')
2132             && (data[++index] == 'o')
2133             && (data[++index] == 'b')
2134             && (data[++index] == 'a')
2135             && (data[++index] == 'l')) {
2136             return TokenNameglobal;
2137           }
2138         }
2139         return TokenNameIdentifier;
2140
2141       case 'i' : //if int
2142         switch (length) {
2143           case 2 :
2144             if (data[++index] == 'f')
2145               return TokenNameif;
2146             else
2147               return TokenNameIdentifier;
2148             //          case 3 :
2149             //            if ((data[++index] == 'n') && (data[++index] == 't'))
2150             //              return TokenNameint;
2151             //            else
2152             //              return TokenNameIdentifier;
2153           case 7 :
2154             if ((data[++index] == 'n')
2155               && (data[++index] == 'c')
2156               && (data[++index] == 'l')
2157               && (data[++index] == 'u')
2158               && (data[++index] == 'd')
2159               && (data[++index] == 'e'))
2160               return TokenNameinclude;
2161             else
2162               return TokenNameIdentifier;
2163           case 12 :
2164             if ((data[++index] == 'n')
2165               && (data[++index] == 'c')
2166               && (data[++index] == 'l')
2167               && (data[++index] == 'u')
2168               && (data[++index] == 'd')
2169               && (data[++index] == 'e')
2170               && (data[++index] == '_')
2171               && (data[++index] == 'o')
2172               && (data[++index] == 'n')
2173               && (data[++index] == 'c')
2174               && (data[++index] == 'e'))
2175               return TokenNameinclude_once;
2176             else
2177               return TokenNameIdentifier;
2178           default :
2179             return TokenNameIdentifier;
2180         }
2181
2182       case 'l' : //list
2183         if (length == 4) {
2184           if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
2185             return TokenNamelist;
2186           }
2187         }
2188         return TokenNameIdentifier;
2189
2190       case 'n' : // new null
2191         switch (length) {
2192           case 3 :
2193             if ((data[++index] == 'e') && (data[++index] == 'w'))
2194               return TokenNamenew;
2195             else
2196               return TokenNameIdentifier;
2197           case 4 :
2198             if ((data[++index] == 'u') && (data[++index] == 'l') && (data[++index] == 'l'))
2199               return TokenNamenull;
2200             else
2201               return TokenNameIdentifier;
2202
2203           default :
2204             return TokenNameIdentifier;
2205         }
2206       case 'o' : // or old_function
2207         if (length == 2) {
2208           if (data[++index] == 'r') {
2209             return TokenNameor;
2210           }
2211         }
2212 //        if (length == 12) {
2213 //          if ((data[++index] == 'l')
2214 //            && (data[++index] == 'd')
2215 //            && (data[++index] == '_')
2216 //            && (data[++index] == 'f')
2217 //            && (data[++index] == 'u')
2218 //            && (data[++index] == 'n')
2219 //            && (data[++index] == 'c')
2220 //            && (data[++index] == 't')
2221 //            && (data[++index] == 'i')
2222 //            && (data[++index] == 'o')
2223 //            && (data[++index] == 'n')) {
2224 //            return TokenNameold_function;
2225 //          }
2226 //        }
2227         return TokenNameIdentifier;
2228
2229       case 'p' : // print
2230         if (length == 5) {
2231           if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
2232             return TokenNameprint;
2233           }
2234         }
2235         return TokenNameIdentifier;
2236       case 'r' : //return require require_once
2237         if (length == 6) {
2238           if ((data[++index] == 'e')
2239             && (data[++index] == 't')
2240             && (data[++index] == 'u')
2241             && (data[++index] == 'r')
2242             && (data[++index] == 'n')) {
2243             return TokenNamereturn;
2244           }
2245         } else if (length == 7) {
2246           if ((data[++index] == 'e')
2247             && (data[++index] == 'q')
2248             && (data[++index] == 'u')
2249             && (data[++index] == 'i')
2250             && (data[++index] == 'r')
2251             && (data[++index] == 'e')) {
2252             return TokenNamerequire;
2253           }
2254         } else if (length == 12) {
2255           if ((data[++index] == 'e')
2256             && (data[++index] == 'q')
2257             && (data[++index] == 'u')
2258             && (data[++index] == 'i')
2259             && (data[++index] == 'r')
2260             && (data[++index] == 'e')
2261             && (data[++index] == '_')
2262             && (data[++index] == 'o')
2263             && (data[++index] == 'n')
2264             && (data[++index] == 'c')
2265             && (data[++index] == 'e')) {
2266             return TokenNamerequire_once;
2267           }
2268         } else
2269           return TokenNameIdentifier;
2270
2271       case 's' : //static switch
2272         switch (length) {
2273           case 6 :
2274             if (data[++index] == 't')
2275               if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
2276                 return TokenNamestatic;
2277               } else
2278                 return TokenNameIdentifier;
2279             else if (
2280               (data[index] == 'w')
2281                 && (data[++index] == 'i')
2282                 && (data[++index] == 't')
2283                 && (data[++index] == 'c')
2284                 && (data[++index] == 'h'))
2285               return TokenNameswitch;
2286             else
2287               return TokenNameIdentifier;
2288           default :
2289             return TokenNameIdentifier;
2290         }
2291
2292       case 't' : // true
2293         switch (length) {
2294
2295           case 4 :
2296             if ((data[index] == 'r') && (data[++index] == 'u') && (data[++index] == 'e'))
2297               return TokenNametrue;
2298             else
2299               return TokenNameIdentifier;
2300             //            if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
2301             //              return TokenNamethis;
2302
2303           default :
2304             return TokenNameIdentifier;
2305         }
2306
2307       case 'v' : //void volatile
2308         switch (length) {
2309           case 3 :
2310             if ((data[++index] == 'a') && (data[++index] == 'r'))
2311               return TokenNamevar;
2312             else
2313               return TokenNameIdentifier;
2314
2315           default :
2316             return TokenNameIdentifier;
2317         }
2318
2319       case 'w' : //while widefp
2320         switch (length) {
2321           case 5 :
2322             if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
2323               return TokenNamewhile;
2324             else
2325               return TokenNameIdentifier;
2326             //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
2327             //return TokenNamewidefp ;
2328             //else
2329             //return TokenNameIdentifier;
2330           default :
2331             return TokenNameIdentifier;
2332         }
2333
2334       case 'x' : //xor
2335         switch (length) {
2336           case 3 :
2337             if ((data[++index] == 'o') && (data[++index] == 'r'))
2338               return TokenNamexor;
2339             else
2340               return TokenNameIdentifier;
2341
2342           default :
2343             return TokenNameIdentifier;
2344         }
2345       default :
2346         return TokenNameIdentifier;
2347     }
2348   }
2349   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
2350
2351     //when entering this method the currentCharacter is the firt
2352     //digit of the number , i.e. it may be preceeded by a . when
2353     //dotPrefix is true
2354
2355     boolean floating = dotPrefix;
2356     if ((!dotPrefix) && (currentCharacter == '0')) {
2357       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
2358         //force the first char of the hexa number do exist...
2359         // consume next character
2360         unicodeAsBackSlash = false;
2361         if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
2362           getNextUnicodeChar();
2363         } else {
2364           if (withoutUnicodePtr != 0) {
2365             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2366           }
2367         }
2368         if (Character.digit(currentCharacter, 16) == -1)
2369           throw new InvalidInputException(INVALID_HEXA);
2370         //---end forcing--
2371         while (getNextCharAsDigit(16)) {
2372         };
2373         if (getNextChar('l', 'L') >= 0)
2374           return TokenNameLongLiteral;
2375         else
2376           return TokenNameIntegerLiteral;
2377       }
2378
2379       //there is x or X in the number
2380       //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
2381       if (getNextCharAsDigit()) { //-------------potential octal-----------------
2382         while (getNextCharAsDigit()) {
2383         };
2384
2385         if (getNextChar('l', 'L') >= 0) {
2386           return TokenNameLongLiteral;
2387         }
2388
2389         if (getNextChar('f', 'F') >= 0) {
2390           return TokenNameFloatingPointLiteral;
2391         }
2392
2393         if (getNextChar('d', 'D') >= 0) {
2394           return TokenNameDoubleLiteral;
2395         } else { //make the distinction between octal and float ....
2396           if (getNextChar('.')) { //bingo ! ....
2397             while (getNextCharAsDigit()) {
2398             };
2399             if (getNextChar('e', 'E') >= 0) { // consume next character
2400               unicodeAsBackSlash = false;
2401               if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
2402                 getNextUnicodeChar();
2403               } else {
2404                 if (withoutUnicodePtr != 0) {
2405                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2406                 }
2407               }
2408
2409               if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume next character
2410                 unicodeAsBackSlash = false;
2411                 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
2412                   getNextUnicodeChar();
2413                 } else {
2414                   if (withoutUnicodePtr != 0) {
2415                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2416                   }
2417                 }
2418               }
2419               if (!Character.isDigit(currentCharacter))
2420                 throw new InvalidInputException(INVALID_FLOAT);
2421               while (getNextCharAsDigit()) {
2422               };
2423             }
2424             if (getNextChar('f', 'F') >= 0)
2425               return TokenNameFloatingPointLiteral;
2426             getNextChar('d', 'D'); //jump over potential d or D
2427             return TokenNameDoubleLiteral;
2428           } else {
2429             return TokenNameIntegerLiteral;
2430           }
2431         }
2432       } else {
2433         /* carry on */
2434       }
2435     }
2436
2437     while (getNextCharAsDigit()) {
2438     };
2439
2440     if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
2441       return TokenNameLongLiteral;
2442
2443     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
2444       while (getNextCharAsDigit()) {
2445       };
2446       floating = true;
2447     }
2448
2449     //if floating is true both exponant and suffix may be optional
2450
2451     if (getNextChar('e', 'E') >= 0) {
2452       floating = true;
2453       // consume next character
2454       unicodeAsBackSlash = false;
2455       if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
2456         getNextUnicodeChar();
2457       } else {
2458         if (withoutUnicodePtr != 0) {
2459           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2460         }
2461       }
2462
2463       if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume next character
2464         unicodeAsBackSlash = false;
2465         if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
2466           getNextUnicodeChar();
2467         } else {
2468           if (withoutUnicodePtr != 0) {
2469             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2470           }
2471         }
2472       }
2473       if (!Character.isDigit(currentCharacter))
2474         throw new InvalidInputException(INVALID_FLOAT);
2475       while (getNextCharAsDigit()) {
2476       };
2477     }
2478
2479     if (getNextChar('d', 'D') >= 0)
2480       return TokenNameDoubleLiteral;
2481     if (getNextChar('f', 'F') >= 0)
2482       return TokenNameFloatingPointLiteral;
2483
2484     //the long flag has been tested before
2485
2486     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
2487   }
2488   /**
2489    * Search the line number corresponding to a specific position
2490    *
2491    */
2492   public final int getLineNumber(int position) {
2493
2494     if (lineEnds == null)
2495       return 1;
2496     int length = linePtr + 1;
2497     if (length == 0)
2498       return 1;
2499     int g = 0, d = length - 1;
2500     int m = 0;
2501     while (g <= d) {
2502       m = (g + d) / 2;
2503       if (position < lineEnds[m]) {
2504         d = m - 1;
2505       } else if (position > lineEnds[m]) {
2506         g = m + 1;
2507       } else {
2508         return m + 1;
2509       }
2510     }
2511     if (position < lineEnds[m]) {
2512       return m + 1;
2513     }
2514     return m + 2;
2515   }
2516   public final void setSource(char[] source) {
2517     //the source-buffer is set to sourceString
2518
2519     if (source == null) {
2520       this.source = new char[0];
2521     } else {
2522       this.source = source;
2523     }
2524     startPosition = -1;
2525     initialPosition = currentPosition = 0;
2526     containsAssertKeyword = false;
2527     withoutUnicodeBuffer = new char[this.source.length];
2528
2529   }
2530
2531   public String toString() {
2532     if (startPosition == source.length)
2533       return "EOF\n\n" + new String(source); //$NON-NLS-1$
2534     if (currentPosition > source.length)
2535       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
2536
2537     char front[] = new char[startPosition];
2538     System.arraycopy(source, 0, front, 0, startPosition);
2539
2540     int middleLength = (currentPosition - 1) - startPosition + 1;
2541     char middle[];
2542     if (middleLength > -1) {
2543       middle = new char[middleLength];
2544       System.arraycopy(source, startPosition, middle, 0, middleLength);
2545     } else {
2546       middle = new char[0];
2547     }
2548
2549     char end[] = new char[source.length - (currentPosition - 1)];
2550     System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
2551
2552     return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
2553     + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
2554     + new String(end);
2555   }
2556   public final String toStringAction(int act) {
2557     switch (act) {
2558       case TokenNameIdentifier :
2559         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
2560         //      case TokenNameabstract :
2561         //        return "abstract"; //$NON-NLS-1$
2562         //      case TokenNameboolean :
2563         //        return "boolean"; //$NON-NLS-1$
2564       case TokenNamebreak :
2565         return "break"; //$NON-NLS-1$
2566         //      case TokenNamebyte :
2567         //        return "byte"; //$NON-NLS-1$
2568       case TokenNamecase :
2569         return "case"; //$NON-NLS-1$
2570         //      case TokenNamecatch :
2571         //        return "catch"; //$NON-NLS-1$
2572         //      case TokenNamechar :
2573         //        return "char"; //$NON-NLS-1$
2574       case TokenNameclass :
2575         return "class"; //$NON-NLS-1$
2576       case TokenNamecontinue :
2577         return "continue"; //$NON-NLS-1$
2578       case TokenNamedefault :
2579         return "default"; //$NON-NLS-1$
2580       case TokenNamedo :
2581         return "do"; //$NON-NLS-1$
2582         //      case TokenNamedouble :
2583         //        return "double"; //$NON-NLS-1$
2584       case TokenNameelse :
2585         return "else"; //$NON-NLS-1$
2586       case TokenNameextends :
2587         return "extends"; //$NON-NLS-1$
2588       case TokenNamefalse :
2589         return "false"; //$NON-NLS-1$
2590         //      case TokenNamefinal :
2591         //        return "final"; //$NON-NLS-1$
2592         //      case TokenNamefinally :
2593         //        return "finally"; //$NON-NLS-1$
2594         //      case TokenNamefloat :
2595         //        return "float"; //$NON-NLS-1$
2596       case TokenNamefor :
2597         return "for"; //$NON-NLS-1$
2598       case TokenNameif :
2599         return "if"; //$NON-NLS-1$
2600         //      case TokenNameimplements :
2601         //        return "implements"; //$NON-NLS-1$
2602         //      case TokenNameimport :
2603         //        return "import"; //$NON-NLS-1$
2604         //      case TokenNameinstanceof :
2605         //        return "instanceof"; //$NON-NLS-1$
2606         //      case TokenNameint :
2607         //        return "int"; //$NON-NLS-1$
2608         //      case TokenNameinterface :
2609         //        return "interface"; //$NON-NLS-1$
2610         //      case TokenNamelong :
2611         //        return "long"; //$NON-NLS-1$
2612         //      case TokenNamenative :
2613         //        return "native"; //$NON-NLS-1$
2614       case TokenNamenew :
2615         return "new"; //$NON-NLS-1$
2616       case TokenNamenull :
2617         return "null"; //$NON-NLS-1$
2618         //      case TokenNamepackage :
2619         //        return "package"; //$NON-NLS-1$
2620         //      case TokenNameprivate :
2621         //        return "private"; //$NON-NLS-1$
2622         //      case TokenNameprotected :
2623         //        return "protected"; //$NON-NLS-1$
2624         //      case TokenNamepublic :
2625         //        return "public"; //$NON-NLS-1$
2626       case TokenNamereturn :
2627         return "return"; //$NON-NLS-1$
2628         //      case TokenNameshort :
2629         //        return "short"; //$NON-NLS-1$
2630       case TokenNamestatic :
2631         return "static"; //$NON-NLS-1$
2632         //      case TokenNamesuper :
2633         //        return "super"; //$NON-NLS-1$
2634       case TokenNameswitch :
2635         return "switch"; //$NON-NLS-1$
2636         //      case TokenNamesynchronized :
2637         //        return "synchronized"; //$NON-NLS-1$
2638         //      case TokenNamethis :
2639         //        return "this"; //$NON-NLS-1$
2640         //      case TokenNamethrow :
2641         //        return "throw"; //$NON-NLS-1$
2642         //      case TokenNamethrows :
2643         //        return "throws"; //$NON-NLS-1$
2644         //      case TokenNametransient :
2645         //        return "transient"; //$NON-NLS-1$
2646       case TokenNametrue :
2647         return "true"; //$NON-NLS-1$
2648         //      case TokenNametry :
2649         //        return "try"; //$NON-NLS-1$
2650         //      case TokenNamevoid :
2651         //        return "void"; //$NON-NLS-1$
2652         //      case TokenNamevolatile :
2653         //        return "volatile"; //$NON-NLS-1$
2654       case TokenNamewhile :
2655         return "while"; //$NON-NLS-1$
2656
2657       case TokenNameIntegerLiteral :
2658         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
2659       case TokenNameLongLiteral :
2660         return "Long(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
2661       case TokenNameFloatingPointLiteral :
2662         return "Float(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
2663       case TokenNameDoubleLiteral :
2664         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
2665       case TokenNameCharacterLiteral :
2666         return "Char(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
2667       case TokenNameStringLiteral :
2668         return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
2669
2670       case TokenNamePLUS_PLUS :
2671         return "++"; //$NON-NLS-1$
2672       case TokenNameMINUS_MINUS :
2673         return "--"; //$NON-NLS-1$
2674       case TokenNameEQUAL_EQUAL :
2675         return "=="; //$NON-NLS-1$
2676       case TokenNameLESS_EQUAL :
2677         return "<="; //$NON-NLS-1$
2678       case TokenNameGREATER_EQUAL :
2679         return ">="; //$NON-NLS-1$
2680       case TokenNameNOT_EQUAL :
2681         return "!="; //$NON-NLS-1$
2682       case TokenNameLEFT_SHIFT :
2683         return "<<"; //$NON-NLS-1$
2684       case TokenNameRIGHT_SHIFT :
2685         return ">>"; //$NON-NLS-1$
2686       case TokenNameUNSIGNED_RIGHT_SHIFT :
2687         return ">>>"; //$NON-NLS-1$
2688       case TokenNamePLUS_EQUAL :
2689         return "+="; //$NON-NLS-1$
2690       case TokenNameMINUS_EQUAL :
2691         return "-="; //$NON-NLS-1$
2692       case TokenNameMULTIPLY_EQUAL :
2693         return "*="; //$NON-NLS-1$
2694       case TokenNameDIVIDE_EQUAL :
2695         return "/="; //$NON-NLS-1$
2696       case TokenNameAND_EQUAL :
2697         return "&="; //$NON-NLS-1$
2698       case TokenNameOR_EQUAL :
2699         return "|="; //$NON-NLS-1$
2700       case TokenNameXOR_EQUAL :
2701         return "^="; //$NON-NLS-1$
2702       case TokenNameREMAINDER_EQUAL :
2703         return "%="; //$NON-NLS-1$
2704       case TokenNameLEFT_SHIFT_EQUAL :
2705         return "<<="; //$NON-NLS-1$
2706       case TokenNameRIGHT_SHIFT_EQUAL :
2707         return ">>="; //$NON-NLS-1$
2708       case TokenNameUNSIGNED_RIGHT_SHIFT_EQUAL :
2709         return ">>>="; //$NON-NLS-1$
2710       case TokenNameOR_OR :
2711         return "||"; //$NON-NLS-1$
2712       case TokenNameAND_AND :
2713         return "&&"; //$NON-NLS-1$
2714       case TokenNamePLUS :
2715         return "+"; //$NON-NLS-1$
2716       case TokenNameMINUS :
2717         return "-"; //$NON-NLS-1$
2718       case TokenNameNOT :
2719         return "!"; //$NON-NLS-1$
2720       case TokenNameREMAINDER :
2721         return "%"; //$NON-NLS-1$
2722       case TokenNameXOR :
2723         return "^"; //$NON-NLS-1$
2724       case TokenNameAND :
2725         return "&"; //$NON-NLS-1$
2726       case TokenNameMULTIPLY :
2727         return "*"; //$NON-NLS-1$
2728       case TokenNameOR :
2729         return "|"; //$NON-NLS-1$
2730       case TokenNameTWIDDLE :
2731         return "~"; //$NON-NLS-1$
2732       case TokenNameDIVIDE :
2733         return "/"; //$NON-NLS-1$
2734       case TokenNameGREATER :
2735         return ">"; //$NON-NLS-1$
2736       case TokenNameLESS :
2737         return "<"; //$NON-NLS-1$
2738       case TokenNameLPAREN :
2739         return "("; //$NON-NLS-1$
2740       case TokenNameRPAREN :
2741         return ")"; //$NON-NLS-1$
2742       case TokenNameLBRACE :
2743         return "{"; //$NON-NLS-1$
2744       case TokenNameRBRACE :
2745         return "}"; //$NON-NLS-1$
2746       case TokenNameLBRACKET :
2747         return "["; //$NON-NLS-1$
2748       case TokenNameRBRACKET :
2749         return "]"; //$NON-NLS-1$
2750       case TokenNameSEMICOLON :
2751         return ";"; //$NON-NLS-1$
2752       case TokenNameQUESTION :
2753         return "?"; //$NON-NLS-1$
2754       case TokenNameCOLON :
2755         return ":"; //$NON-NLS-1$
2756       case TokenNameCOMMA :
2757         return ","; //$NON-NLS-1$
2758       case TokenNameDOT :
2759         return "."; //$NON-NLS-1$
2760       case TokenNameEQUAL :
2761         return "="; //$NON-NLS-1$
2762       case TokenNameEOF :
2763         return "EOF"; //$NON-NLS-1$
2764       default :
2765         return "not-a-token"; //$NON-NLS-1$
2766     }
2767   }
2768
2769   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
2770     this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
2771   }
2772
2773   public Scanner(
2774     boolean tokenizeComments,
2775     boolean tokenizeWhiteSpace,
2776     boolean checkNonExternalizedStringLiterals,
2777     boolean assertMode) {
2778     this.eofPosition = Integer.MAX_VALUE;
2779     this.tokenizeComments = tokenizeComments;
2780     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
2781     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
2782     this.assertMode = assertMode;
2783   }
2784
2785   private void checkNonExternalizeString() throws InvalidInputException {
2786     if (currentLine == null)
2787       return;
2788     parseTags(currentLine);
2789   }
2790
2791   private void parseTags(NLSLine line) throws InvalidInputException {
2792     String s = new String(getCurrentTokenSource());
2793     int pos = s.indexOf(TAG_PREFIX);
2794     int lineLength = line.size();
2795     while (pos != -1) {
2796       int start = pos + TAG_PREFIX_LENGTH;
2797       int end = s.indexOf(TAG_POSTFIX, start);
2798       String index = s.substring(start, end);
2799       int i = 0;
2800       try {
2801         i = Integer.parseInt(index) - 1; // Tags are one based not zero based.
2802       } catch (NumberFormatException e) {
2803         i = -1; // we don't want to consider this as a valid NLS tag
2804       }
2805       if (line.exists(i)) {
2806         line.set(i, null);
2807       }
2808       pos = s.indexOf(TAG_PREFIX, start);
2809     }
2810
2811     this.nonNLSStrings = new StringLiteral[lineLength];
2812     int nonNLSCounter = 0;
2813     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
2814       StringLiteral literal = (StringLiteral) iterator.next();
2815       if (literal != null) {
2816         this.nonNLSStrings[nonNLSCounter++] = literal;
2817       }
2818     }
2819     if (nonNLSCounter == 0) {
2820       this.nonNLSStrings = null;
2821       currentLine = null;
2822       return;
2823     }
2824     this.wasNonExternalizedStringLiteral = true;
2825     if (nonNLSCounter != lineLength) {
2826       System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
2827     }
2828     currentLine = null;
2829   }
2830 }