net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java

   1 /*******************************************************************************
   2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
   3  * All rights reserved. This program and the accompanying materials
   4  * are made available under the terms of the Common Public License v0.5
   5  * which accompanies this distribution, and is available at
   6  * http://www.eclipse.org/legal/cpl-v05.html
   7  *
   8  * Contributors:
   9  *     IBM Corporation - initial API and implementation
  10  ******************************************************************************/
  11 package net.sourceforge.phpdt.internal.compiler.parser;
  12
  13 import java.util.ArrayList;
  14 import java.util.Iterator;
  15 import java.util.List;
  16
  17 import net.sourceforge.phpdt.core.compiler.*;
  18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
  19
  20 public class Scanner implements IScanner, ITerminalSymbols {
  21
  22   /* APIs ares
  23    - getNextToken() which return the current type of the token
  24      (this value is not memorized by the scanner)
  25    - getCurrentTokenSource() which provides with the token "REAL" source
  26      (aka all unicode have been transformed into a correct char)
  27    - sourceStart gives the position into the stream
  28    - currentPosition-1 gives the sourceEnd position into the stream
  29   */
  30
  31   // 1.4 feature
  32   private boolean assertMode;
  33   public boolean useAssertAsAnIndentifier = false;
  34   //flag indicating if processed source contains occurrences of keyword assert
  35   public boolean containsAssertKeyword = false;
  36
  37   public boolean recordLineSeparator;
  38   public boolean phpMode = false;
  39
  40   public char currentCharacter;
  41   public int startPosition;
  42   public int currentPosition;
  43   public int initialPosition, eofPosition;
  44   // after this position eof are generated instead of real token from the source
  45
  46   public boolean tokenizeComments;
  47   public boolean tokenizeWhiteSpace;
  48
  49   //source should be viewed as a window (aka a part)
  50   //of a entire very large stream
  51   public char source[];
  52
  53   //unicode support
  54   public char[] withoutUnicodeBuffer;
  55   public int withoutUnicodePtr;
  56   //when == 0 ==> no unicode in the current token
  57   public boolean unicodeAsBackSlash = false;
  58
  59   public boolean scanningFloatLiteral = false;
  60
  61   //support for /** comments
  62   //public char[][] comments = new char[10][];
  63   public int[] commentStops = new int[10];
  64   public int[] commentStarts = new int[10];
  65   public int commentPtr = -1; // no comment test with commentPtr value -1
  66
  67   //diet parsing support - jump over some method body when requested
  68   public boolean diet = false;
  69
  70   //support for the  poor-line-debuggers ....
  71   //remember the position of the cr/lf
  72   public int[] lineEnds = new int[250];
  73   public int linePtr = -1;
  74   public boolean wasAcr = false;
  75
  76   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
  77
  78   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
  79   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
  80   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
  81   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
  82   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
  83   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
  84   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
  85
  86   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
  87   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
  88   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
  89   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
  90
  91   //----------------optimized identifier managment------------------
  92   static final char[] charArray_a = new char[] { 'a' },
  93     charArray_b = new char[] { 'b' },
  94     charArray_c = new char[] { 'c' },
  95     charArray_d = new char[] { 'd' },
  96     charArray_e = new char[] { 'e' },
  97     charArray_f = new char[] { 'f' },
  98     charArray_g = new char[] { 'g' },
  99     charArray_h = new char[] { 'h' },
 100     charArray_i = new char[] { 'i' },
 101     charArray_j = new char[] { 'j' },
 102     charArray_k = new char[] { 'k' },
 103     charArray_l = new char[] { 'l' },
 104     charArray_m = new char[] { 'm' },
 105     charArray_n = new char[] { 'n' },
 106     charArray_o = new char[] { 'o' },
 107     charArray_p = new char[] { 'p' },
 108     charArray_q = new char[] { 'q' },
 109     charArray_r = new char[] { 'r' },
 110     charArray_s = new char[] { 's' },
 111     charArray_t = new char[] { 't' },
 112     charArray_u = new char[] { 'u' },
 113     charArray_v = new char[] { 'v' },
 114     charArray_w = new char[] { 'w' },
 115     charArray_x = new char[] { 'x' },
 116     charArray_y = new char[] { 'y' },
 117     charArray_z = new char[] { 'z' };
 118
 119   static final char[] initCharArray =
 120     new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
 121   static final int TableSize = 30, InternalTableSize = 6;
 122   //30*6 = 180 entries
 123   public static final int OptimizedLength = 6;
 124   public /*static*/
 125   final char[][][][] charArray_length =
 126     new char[OptimizedLength][TableSize][InternalTableSize][];
 127   // support for detecting non-externalized string literals
 128   int currentLineNr = -1;
 129   int previousLineNr = -1;
 130   NLSLine currentLine = null;
 131   List lines = new ArrayList();
 132   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
 133   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
 134   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
 135   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
 136   public StringLiteral[] nonNLSStrings = null;
 137   public boolean checkNonExternalizedStringLiterals = true;
 138   public boolean wasNonExternalizedStringLiteral = false;
 139
 140   /*static*/ {
 141     for (int i = 0; i < 6; i++) {
 142       for (int j = 0; j < TableSize; j++) {
 143         for (int k = 0; k < InternalTableSize; k++) {
 144           charArray_length[i][j][k] = initCharArray;
 145         }
 146       }
 147     }
 148   }
 149   static int newEntry2 = 0,
 150     newEntry3 = 0,
 151     newEntry4 = 0,
 152     newEntry5 = 0,
 153     newEntry6 = 0;
 154
 155   public static final int RoundBracket = 0;
 156   public static final int SquareBracket = 1;
 157   public static final int CurlyBracket = 2;
 158   public static final int BracketKinds = 3;
 159
 160   public static final boolean DEBUG = false;
 161   public Scanner() {
 162     this(false, false);
 163   }
 164   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
 165     this(tokenizeComments, tokenizeWhiteSpace, false);
 166   }
 167
 168   /**
 169    * Determines if the specified character is
 170    * permissible as the first character in a PHP identifier
 171    */
 172   public static boolean isPHPIdentifierStart(char ch) {
 173     return Character.isLetter(ch) || (ch == '_');
 174   }
 175
 176   /**
 177    * Determines if the specified character may be part of a PHP identifier as
 178    * other than the first character
 179    */
 180   public static boolean isPHPIdentifierPart(char ch) {
 181     return Character.isLetterOrDigit(ch) || (ch == '_');
 182   }
 183
 184   public final boolean atEnd() {
 185     // This code is not relevant if source is
 186     // Only a part of the real stream input
 187
 188     return source.length == currentPosition;
 189   }
 190   public char[] getCurrentIdentifierSource() {
 191     //return the token REAL source (aka unicodes are precomputed)
 192
 193     char[] result;
 194     if (withoutUnicodePtr != 0)
 195       //0 is used as a fast test flag so the real first char is in position 1
 196       System.arraycopy(
 197         withoutUnicodeBuffer,
 198         1,
 199         result = new char[withoutUnicodePtr],
 200         0,
 201         withoutUnicodePtr);
 202     else {
 203       int length = currentPosition - startPosition;
 204       switch (length) { // see OptimizedLength
 205         case 1 :
 206           return optimizedCurrentTokenSource1();
 207         case 2 :
 208           return optimizedCurrentTokenSource2();
 209         case 3 :
 210           return optimizedCurrentTokenSource3();
 211         case 4 :
 212           return optimizedCurrentTokenSource4();
 213         case 5 :
 214           return optimizedCurrentTokenSource5();
 215         case 6 :
 216           return optimizedCurrentTokenSource6();
 217       }
 218       //no optimization
 219       System.arraycopy(
 220         source,
 221         startPosition,
 222         result = new char[length],
 223         0,
 224         length);
 225     }
 226     return result;
 227   }
 228   public int getCurrentTokenEndPosition() {
 229     return this.currentPosition - 1;
 230   }
 231   public final char[] getCurrentTokenSource() {
 232     // Return the token REAL source (aka unicodes are precomputed)
 233
 234     char[] result;
 235     if (withoutUnicodePtr != 0)
 236       // 0 is used as a fast test flag so the real first char is in position 1
 237       System.arraycopy(
 238         withoutUnicodeBuffer,
 239         1,
 240         result = new char[withoutUnicodePtr],
 241         0,
 242         withoutUnicodePtr);
 243     else {
 244       int length;
 245       System.arraycopy(
 246         source,
 247         startPosition,
 248         result = new char[length = currentPosition - startPosition],
 249         0,
 250         length);
 251     }
 252     return result;
 253   }
 254
 255   public final char[] getCurrentTokenSource(int startPos) {
 256     // Return the token REAL source (aka unicodes are precomputed)
 257
 258     char[] result;
 259     if (withoutUnicodePtr != 0)
 260       // 0 is used as a fast test flag so the real first char is in position 1
 261       System.arraycopy(
 262         withoutUnicodeBuffer,
 263         1,
 264         result = new char[withoutUnicodePtr],
 265         0,
 266         withoutUnicodePtr);
 267     else {
 268       int length;
 269       System.arraycopy(
 270         source,
 271         startPos,
 272         result = new char[length = currentPosition - startPos],
 273         0,
 274         length);
 275     }
 276     return result;
 277   }
 278
 279   public final char[] getCurrentTokenSourceString() {
 280     //return the token REAL source (aka unicodes are precomputed).
 281     //REMOVE the two " that are at the beginning and the end.
 282
 283     char[] result;
 284     if (withoutUnicodePtr != 0)
 285       //0 is used as a fast test flag so the real first char is in position 1
 286       System.arraycopy(withoutUnicodeBuffer, 2,
 287       //2 is 1 (real start) + 1 (to jump over the ")
 288       result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
 289     else {
 290       int length;
 291       System.arraycopy(
 292         source,
 293         startPosition + 1,
 294         result = new char[length = currentPosition - startPosition - 2],
 295         0,
 296         length);
 297     }
 298     return result;
 299   }
 300   public int getCurrentTokenStartPosition() {
 301     return this.startPosition;
 302   }
 303   /*
 304    * Search the source position corresponding to the end of a given line number
 305    *
 306    * Line numbers are 1-based, and relative to the scanner initialPosition.
 307    * Character positions are 0-based.
 308    *
 309    * In case the given line number is inconsistent, answers -1.
 310    */
 311   public final int getLineEnd(int lineNumber) {
 312
 313     if (lineEnds == null)
 314       return -1;
 315     if (lineNumber >= lineEnds.length)
 316       return -1;
 317     if (lineNumber <= 0)
 318       return -1;
 319
 320     if (lineNumber == lineEnds.length - 1)
 321       return eofPosition;
 322     return lineEnds[lineNumber - 1];
 323     // next line start one character behind the lineEnd of the previous line
 324   }
 325   /**
 326    * Search the source position corresponding to the beginning of a given line number
 327    *
 328    * Line numbers are 1-based, and relative to the scanner initialPosition.
 329    * Character positions are 0-based.
 330    *
 331    * e.g.       getLineStart(1) --> 0   i.e. first line starts at character 0.
 332    *
 333    * In case the given line number is inconsistent, answers -1.
 334    */
 335   public final int getLineStart(int lineNumber) {
 336
 337     if (lineEnds == null)
 338       return -1;
 339     if (lineNumber >= lineEnds.length)
 340       return -1;
 341     if (lineNumber <= 0)
 342       return -1;
 343
 344     if (lineNumber == 1)
 345       return initialPosition;
 346     return lineEnds[lineNumber - 2] + 1;
 347     // next line start one character behind the lineEnd of the previous line
 348   }
 349   public final boolean getNextChar(char testedChar) {
 350     //BOOLEAN
 351     //handle the case of unicode.
 352     //when a unicode appears then we must use a buffer that holds char internal values
 353     //At the end of this method currentCharacter holds the new visited char
 354     //and currentPosition points right next after it
 355     //Both previous lines are true if the currentCharacter is == to the testedChar
 356     //On false, no side effect has occured.
 357
 358     //ALL getNextChar.... ARE OPTIMIZED COPIES
 359
 360     int temp = currentPosition;
 361     try {
 362       if (((currentCharacter = source[currentPosition++]) == '\\')
 363         && (source[currentPosition] == 'u')) {
 364         //-------------unicode traitement ------------
 365         int c1, c2, c3, c4;
 366         int unicodeSize = 6;
 367         currentPosition++;
 368         while (source[currentPosition] == 'u') {
 369           currentPosition++;
 370           unicodeSize++;
 371         }
 372
 373         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 374           || c1 < 0)
 375           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 376             || c2 < 0)
 377           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 378             || c3 < 0)
 379           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 380             || c4 < 0)) {
 381           currentPosition = temp;
 382           return false;
 383         }
 384
 385         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 386         if (currentCharacter != testedChar) {
 387           currentPosition = temp;
 388           return false;
 389         }
 390         unicodeAsBackSlash = currentCharacter == '\\';
 391
 392         //need the unicode buffer
 393         if (withoutUnicodePtr == 0) {
 394           //buffer all the entries that have been left aside....
 395           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 396           System.arraycopy(
 397             source,
 398             startPosition,
 399             withoutUnicodeBuffer,
 400             1,
 401             withoutUnicodePtr);
 402         }
 403         //fill the buffer with the char
 404         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 405         return true;
 406
 407       } //-------------end unicode traitement--------------
 408       else {
 409         if (currentCharacter != testedChar) {
 410           currentPosition = temp;
 411           return false;
 412         }
 413         unicodeAsBackSlash = false;
 414         if (withoutUnicodePtr != 0)
 415           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 416         return true;
 417       }
 418     } catch (IndexOutOfBoundsException e) {
 419       unicodeAsBackSlash = false;
 420       currentPosition = temp;
 421       return false;
 422     }
 423   }
 424   public final int getNextChar(char testedChar1, char testedChar2) {
 425     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
 426     //test can be done with (x==0) for the first and (x>0) for the second
 427     //handle the case of unicode.
 428     //when a unicode appears then we must use a buffer that holds char internal values
 429     //At the end of this method currentCharacter holds the new visited char
 430     //and currentPosition points right next after it
 431     //Both previous lines are true if the currentCharacter is == to the testedChar1/2
 432     //On false, no side effect has occured.
 433
 434     //ALL getNextChar.... ARE OPTIMIZED COPIES
 435
 436     int temp = currentPosition;
 437     try {
 438       int result;
 439       if (((currentCharacter = source[currentPosition++]) == '\\')
 440         && (source[currentPosition] == 'u')) {
 441         //-------------unicode traitement ------------
 442         int c1, c2, c3, c4;
 443         int unicodeSize = 6;
 444         currentPosition++;
 445         while (source[currentPosition] == 'u') {
 446           currentPosition++;
 447           unicodeSize++;
 448         }
 449
 450         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 451           || c1 < 0)
 452           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 453             || c2 < 0)
 454           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 455             || c3 < 0)
 456           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 457             || c4 < 0)) {
 458           currentPosition = temp;
 459           return 2;
 460         }
 461
 462         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 463         if (currentCharacter == testedChar1)
 464           result = 0;
 465         else if (currentCharacter == testedChar2)
 466           result = 1;
 467         else {
 468           currentPosition = temp;
 469           return -1;
 470         }
 471
 472         //need the unicode buffer
 473         if (withoutUnicodePtr == 0) {
 474           //buffer all the entries that have been left aside....
 475           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 476           System.arraycopy(
 477             source,
 478             startPosition,
 479             withoutUnicodeBuffer,
 480             1,
 481             withoutUnicodePtr);
 482         }
 483         //fill the buffer with the char
 484         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 485         return result;
 486       } //-------------end unicode traitement--------------
 487       else {
 488         if (currentCharacter == testedChar1)
 489           result = 0;
 490         else if (currentCharacter == testedChar2)
 491           result = 1;
 492         else {
 493           currentPosition = temp;
 494           return -1;
 495         }
 496
 497         if (withoutUnicodePtr != 0)
 498           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 499         return result;
 500       }
 501     } catch (IndexOutOfBoundsException e) {
 502       currentPosition = temp;
 503       return -1;
 504     }
 505   }
 506   public final boolean getNextCharAsDigit() {
 507     //BOOLEAN
 508     //handle the case of unicode.
 509     //when a unicode appears then we must use a buffer that holds char internal values
 510     //At the end of this method currentCharacter holds the new visited char
 511     //and currentPosition points right next after it
 512     //Both previous lines are true if the currentCharacter is a digit
 513     //On false, no side effect has occured.
 514
 515     //ALL getNextChar.... ARE OPTIMIZED COPIES
 516
 517     int temp = currentPosition;
 518     try {
 519       if (((currentCharacter = source[currentPosition++]) == '\\')
 520         && (source[currentPosition] == 'u')) {
 521         //-------------unicode traitement ------------
 522         int c1, c2, c3, c4;
 523         int unicodeSize = 6;
 524         currentPosition++;
 525         while (source[currentPosition] == 'u') {
 526           currentPosition++;
 527           unicodeSize++;
 528         }
 529
 530         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 531           || c1 < 0)
 532           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 533             || c2 < 0)
 534           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 535             || c3 < 0)
 536           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 537             || c4 < 0)) {
 538           currentPosition = temp;
 539           return false;
 540         }
 541
 542         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 543         if (!Character.isDigit(currentCharacter)) {
 544           currentPosition = temp;
 545           return false;
 546         }
 547
 548         //need the unicode buffer
 549         if (withoutUnicodePtr == 0) {
 550           //buffer all the entries that have been left aside....
 551           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 552           System.arraycopy(
 553             source,
 554             startPosition,
 555             withoutUnicodeBuffer,
 556             1,
 557             withoutUnicodePtr);
 558         }
 559         //fill the buffer with the char
 560         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 561         return true;
 562       } //-------------end unicode traitement--------------
 563       else {
 564         if (!Character.isDigit(currentCharacter)) {
 565           currentPosition = temp;
 566           return false;
 567         }
 568         if (withoutUnicodePtr != 0)
 569           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 570         return true;
 571       }
 572     } catch (IndexOutOfBoundsException e) {
 573       currentPosition = temp;
 574       return false;
 575     }
 576   }
 577   public final boolean getNextCharAsDigit(int radix) {
 578     //BOOLEAN
 579     //handle the case of unicode.
 580     //when a unicode appears then we must use a buffer that holds char internal values
 581     //At the end of this method currentCharacter holds the new visited char
 582     //and currentPosition points right next after it
 583     //Both previous lines are true if the currentCharacter is a digit base on radix
 584     //On false, no side effect has occured.
 585
 586     //ALL getNextChar.... ARE OPTIMIZED COPIES
 587
 588     int temp = currentPosition;
 589     try {
 590       if (((currentCharacter = source[currentPosition++]) == '\\')
 591         && (source[currentPosition] == 'u')) {
 592         //-------------unicode traitement ------------
 593         int c1, c2, c3, c4;
 594         int unicodeSize = 6;
 595         currentPosition++;
 596         while (source[currentPosition] == 'u') {
 597           currentPosition++;
 598           unicodeSize++;
 599         }
 600
 601         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 602           || c1 < 0)
 603           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 604             || c2 < 0)
 605           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 606             || c3 < 0)
 607           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 608             || c4 < 0)) {
 609           currentPosition = temp;
 610           return false;
 611         }
 612
 613         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 614         if (Character.digit(currentCharacter, radix) == -1) {
 615           currentPosition = temp;
 616           return false;
 617         }
 618
 619         //need the unicode buffer
 620         if (withoutUnicodePtr == 0) {
 621           //buffer all the entries that have been left aside....
 622           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 623           System.arraycopy(
 624             source,
 625             startPosition,
 626             withoutUnicodeBuffer,
 627             1,
 628             withoutUnicodePtr);
 629         }
 630         //fill the buffer with the char
 631         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 632         return true;
 633       } //-------------end unicode traitement--------------
 634       else {
 635         if (Character.digit(currentCharacter, radix) == -1) {
 636           currentPosition = temp;
 637           return false;
 638         }
 639         if (withoutUnicodePtr != 0)
 640           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 641         return true;
 642       }
 643     } catch (IndexOutOfBoundsException e) {
 644       currentPosition = temp;
 645       return false;
 646     }
 647   }
 648   public boolean getNextCharAsJavaIdentifierPart() {
 649     //BOOLEAN
 650     //handle the case of unicode.
 651     //when a unicode appears then we must use a buffer that holds char internal values
 652     //At the end of this method currentCharacter holds the new visited char
 653     //and currentPosition points right next after it
 654     //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
 655     //On false, no side effect has occured.
 656
 657     //ALL getNextChar.... ARE OPTIMIZED COPIES
 658
 659     int temp = currentPosition;
 660     try {
 661       if (((currentCharacter = source[currentPosition++]) == '\\')
 662         && (source[currentPosition] == 'u')) {
 663         //-------------unicode traitement ------------
 664         int c1, c2, c3, c4;
 665         int unicodeSize = 6;
 666         currentPosition++;
 667         while (source[currentPosition] == 'u') {
 668           currentPosition++;
 669           unicodeSize++;
 670         }
 671
 672         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 673           || c1 < 0)
 674           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 675             || c2 < 0)
 676           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 677             || c3 < 0)
 678           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 679             || c4 < 0)) {
 680           currentPosition = temp;
 681           return false;
 682         }
 683
 684         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 685         if (!isPHPIdentifierPart(currentCharacter)) {
 686           currentPosition = temp;
 687           return false;
 688         }
 689
 690         //need the unicode buffer
 691         if (withoutUnicodePtr == 0) {
 692           //buffer all the entries that have been left aside....
 693           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 694           System.arraycopy(
 695             source,
 696             startPosition,
 697             withoutUnicodeBuffer,
 698             1,
 699             withoutUnicodePtr);
 700         }
 701         //fill the buffer with the char
 702         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 703         return true;
 704       } //-------------end unicode traitement--------------
 705       else {
 706         if (!isPHPIdentifierPart(currentCharacter)) {
 707           currentPosition = temp;
 708           return false;
 709         }
 710
 711         if (withoutUnicodePtr != 0)
 712           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 713         return true;
 714       }
 715     } catch (IndexOutOfBoundsException e) {
 716       currentPosition = temp;
 717       return false;
 718     }
 719   }
 720
 721   public int getNextToken() throws InvalidInputException {
 722     try {
 723       while (!phpMode) {
 724         startPosition = currentPosition;
 725         currentCharacter = source[currentPosition++];
 726         if (currentCharacter == '<') {
 727           if (getNextChar('?')) {
 728             currentCharacter = source[currentPosition++];
 729             if ((currentCharacter == ' ')
 730               || Character.isWhitespace(currentCharacter)) {
 731               // <?
 732               startPosition = currentPosition;
 733               phpMode = true;
 734             } else {
 735               boolean phpStart =
 736                 (currentCharacter == 'P') || (currentCharacter == 'p');
 737               if (phpStart) {
 738                 int test = getNextChar('H', 'h');
 739                 if (test >= 0) {
 740                   test = getNextChar('P', 'p');
 741                   if (test >= 0) {
 742                     // <?PHP  <?php
 743                     startPosition = currentPosition;
 744                     phpMode = true;
 745                   }
 746                 }
 747               }
 748             }
 749           }
 750         }
 751
 752         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 753           if (recordLineSeparator) {
 754             pushLineSeparator();
 755           } else {
 756             currentLine = null;
 757           }
 758         }
 759       }
 760     } //-----------------end switch while try--------------------
 761     catch (IndexOutOfBoundsException e) {
 762       return TokenNameEOF;
 763     }
 764
 765     if (phpMode) {
 766       this.wasAcr = false;
 767       if (diet) {
 768         jumpOverMethodBody();
 769         diet = false;
 770         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
 771       }
 772       try {
 773         while (true) { //loop for jumping over comments
 774           withoutUnicodePtr = 0;
 775           //start with a new token (even comment written with unicode )
 776
 777           // ---------Consume white space and handles startPosition---------
 778           int whiteStart = currentPosition;
 779           boolean isWhiteSpace;
 780           do {
 781             startPosition = currentPosition;
 782             if (((currentCharacter = source[currentPosition++]) == '\\')
 783               && (source[currentPosition] == 'u')) {
 784               isWhiteSpace = jumpOverUnicodeWhiteSpace();
 785             } else {
 786               if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 787                 checkNonExternalizeString();
 788                 if (recordLineSeparator) {
 789                   pushLineSeparator();
 790                 } else {
 791                   currentLine = null;
 792                 }
 793               }
 794               isWhiteSpace =
 795                 (currentCharacter == ' ')
 796                   || Character.isWhitespace(currentCharacter);
 797             }
 798           } while (isWhiteSpace);
 799           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
 800             // reposition scanner in case we are interested by spaces as tokens
 801             currentPosition--;
 802             startPosition = whiteStart;
 803             return TokenNameWHITESPACE;
 804           }
 805           //little trick to get out in the middle of a source compuation
 806           if (currentPosition > eofPosition)
 807             return TokenNameEOF;
 808
 809           // ---------Identify the next token-------------
 810
 811           switch (currentCharacter) {
 812             case '(' :
 813               return TokenNameLPAREN;
 814             case ')' :
 815               return TokenNameRPAREN;
 816             case '{' :
 817               return TokenNameLBRACE;
 818             case '}' :
 819               return TokenNameRBRACE;
 820             case '[' :
 821               return TokenNameLBRACKET;
 822             case ']' :
 823               return TokenNameRBRACKET;
 824             case ';' :
 825               return TokenNameSEMICOLON;
 826             case ',' :
 827               return TokenNameCOMMA;
 828
 829             case '.' :
 830               if (getNextCharAsDigit())
 831                 return scanNumber(true);
 832               return TokenNameDOT;
 833             case '+' :
 834               {
 835                 int test;
 836                 if ((test = getNextChar('+', '=')) == 0)
 837                   return TokenNamePLUS_PLUS;
 838                 if (test > 0)
 839                   return TokenNamePLUS_EQUAL;
 840                 return TokenNamePLUS;
 841               }
 842             case '-' :
 843               {
 844                 int test;
 845                 if ((test = getNextChar('-', '=')) == 0)
 846                   return TokenNameMINUS_MINUS;
 847                 if (test > 0)
 848                   return TokenNameMINUS_EQUAL;
 849                 if (getNextChar('>'))
 850                   return TokenNameMINUS_GREATER;
 851
 852                 return TokenNameMINUS;
 853               }
 854             case '~' :
 855               if (getNextChar('='))
 856                 return TokenNameTWIDDLE_EQUAL;
 857               return TokenNameTWIDDLE;
 858             case '!' :
 859               if (getNextChar('='))
 860                 return TokenNameNOT_EQUAL;
 861               return TokenNameNOT;
 862             case '*' :
 863               if (getNextChar('='))
 864                 return TokenNameMULTIPLY_EQUAL;
 865               return TokenNameMULTIPLY;
 866             case '%' :
 867               if (getNextChar('='))
 868                 return TokenNameREMAINDER_EQUAL;
 869               return TokenNameREMAINDER;
 870             case '<' :
 871               {
 872                 int test;
 873                 if ((test = getNextChar('=', '<')) == 0)
 874                   return TokenNameLESS_EQUAL;
 875                 if (test > 0) {
 876                   if (getNextChar('='))
 877                     return TokenNameLEFT_SHIFT_EQUAL;
 878                   if (getNextChar('<')) {
 879                     int heredocStart = currentPosition;
 880                     int heredocLength = 0;
 881                     currentCharacter = source[currentPosition++];
 882                     if (isPHPIdentifierStart(currentCharacter)) {
 883                       currentCharacter = source[currentPosition++];
 884                     } else {
 885                       return TokenNameERROR;
 886                     }
 887                     while (isPHPIdentifierPart(currentCharacter)) {
 888                       currentCharacter = source[currentPosition++];
 889                     }
 890
 891                     heredocLength = currentPosition - heredocStart - 1;
 892
 893                     // heredoc end-tag determination
 894                     boolean endTag = true;
 895                     char ch;
 896                     do {
 897                       ch = source[currentPosition++];
 898                       if (ch == '\r' || ch == '\n') {
 899                         if (recordLineSeparator) {
 900                           pushLineSeparator();
 901                         } else {
 902                           currentLine = null;
 903                         }
 904                         for (int i = 0; i < heredocLength; i++) {
 905                           if (source[currentPosition + i]
 906                             != source[heredocStart + i]) {
 907                             endTag = false;
 908                             break;
 909                           }
 910                         }
 911                         if (endTag) {
 912                           currentPosition += heredocLength - 1;
 913                           currentCharacter = source[currentPosition++];
 914                           break; // do...while loop
 915                         } else {
 916                           endTag = true;
 917                         }
 918                       }
 919
 920                     } while (true);
 921
 922                     return TokenNameHEREDOC;
 923                   }
 924                   return TokenNameLEFT_SHIFT;
 925                 }
 926                 return TokenNameLESS;
 927               }
 928             case '>' :
 929               {
 930                 int test;
 931                 if ((test = getNextChar('=', '>')) == 0)
 932                   return TokenNameGREATER_EQUAL;
 933                 if (test > 0) {
 934                   if ((test = getNextChar('=', '>')) == 0)
 935                     return TokenNameRIGHT_SHIFT_EQUAL;
 936                   return TokenNameRIGHT_SHIFT;
 937                 }
 938                 return TokenNameGREATER;
 939               }
 940             case '=' :
 941               if (getNextChar('='))
 942                 return TokenNameEQUAL_EQUAL;
 943               if (getNextChar('>'))
 944                 return TokenNameEQUAL_GREATER;
 945               return TokenNameEQUAL;
 946             case '&' :
 947               {
 948                 int test;
 949                 if ((test = getNextChar('&', '=')) == 0)
 950                   return TokenNameAND_AND;
 951                 if (test > 0)
 952                   return TokenNameAND_EQUAL;
 953                 return TokenNameAND;
 954               }
 955             case '|' :
 956               {
 957                 int test;
 958                 if ((test = getNextChar('|', '=')) == 0)
 959                   return TokenNameOR_OR;
 960                 if (test > 0)
 961                   return TokenNameOR_EQUAL;
 962                 return TokenNameOR;
 963               }
 964             case '^' :
 965               if (getNextChar('='))
 966                 return TokenNameXOR_EQUAL;
 967               return TokenNameXOR;
 968             case '?' :
 969               if (getNextChar('>')) {
 970                 phpMode = false;
 971                 return TokenNameStopPHP;
 972               }
 973               return TokenNameQUESTION;
 974             case ':' :
 975               if (getNextChar(':'))
 976                 return TokenNameCOLON_COLON;
 977               return TokenNameCOLON;
 978             case '@' :
 979               return TokenNameAT;
 980               //                                        case '\'' :
 981               //                                                {
 982               //                                                        int test;
 983               //                                                        if ((test = getNextChar('\n', '\r')) == 0) {
 984               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
 985               //                                                        }
 986               //                                                        if (test > 0) {
 987               //                                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 988               //                                                                for (int lookAhead = 0;
 989               //                                                                        lookAhead < 3;
 990               //                                                                        lookAhead++) {
 991               //                                                                        if (currentPosition + lookAhead
 992               //                                                                                == source.length)
 993               //                                                                                break;
 994               //                                                                        if (source[currentPosition + lookAhead]
 995               //                                                                                == '\n')
 996               //                                                                                break;
 997               //                                                                        if (source[currentPosition + lookAhead]
 998               //                                                                                == '\'') {
 999               //                                                                                currentPosition += lookAhead + 1;
1000               //                                                                                break;
1001               //                                                                        }
1002               //                                                                }
1003               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1004               //                                                        }
1005               //                                                }
1006               //                                                if (getNextChar('\'')) {
1007               //                                                        // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1008               //                                                        for (int lookAhead = 0;
1009               //                                                                lookAhead < 3;
1010               //                                                                lookAhead++) {
1011               //                                                                if (currentPosition + lookAhead
1012               //                                                                        == source.length)
1013               //                                                                        break;
1014               //                                                                if (source[currentPosition + lookAhead]
1015               //                                                                        == '\n')
1016               //                                                                        break;
1017               //                                                                if (source[currentPosition + lookAhead]
1018               //                                                                        == '\'') {
1019               //                                                                        currentPosition += lookAhead + 1;
1020               //                                                                        break;
1021               //                                                                }
1022               //                                                        }
1023               //                                                        throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1024               //                                                }
1025               //                                                if (getNextChar('\\'))
1026               //                                                        scanEscapeCharacter();
1027               //                                                else { // consume next character
1028               //                                                        unicodeAsBackSlash = false;
1029               //                                                        if (((currentCharacter = source[currentPosition++])
1030               //                                                                == '\\')
1031               //                                                                && (source[currentPosition] == 'u')) {
1032               //                                                                getNextUnicodeChar();
1033               //                                                        } else {
1034               //                                                                if (withoutUnicodePtr != 0) {
1035               //                                                                        withoutUnicodeBuffer[++withoutUnicodePtr] =
1036               //                                                                                currentCharacter;
1037               //                                                                }
1038               //                                                        }
1039               //                                                }
1040               //                                                //            if (getNextChar('\''))
1041               //                                                //              return TokenNameCharacterLiteral;
1042               //                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1043               //                                                for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1044               //                                                        if (currentPosition + lookAhead == source.length)
1045               //                                                                break;
1046               //                                                        if (source[currentPosition + lookAhead] == '\n')
1047               //                                                                break;
1048               //                                                        if (source[currentPosition + lookAhead] == '\'') {
1049               //                                                                currentPosition += lookAhead + 1;
1050               //                                                                break;
1051               //                                                        }
1052               //                                                }
1053               //                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1054             case '\'' :
1055               try {
1056                 // consume next character
1057                 unicodeAsBackSlash = false;
1058                 if (((currentCharacter = source[currentPosition++]) == '\\')
1059                   && (source[currentPosition] == 'u')) {
1060                   getNextUnicodeChar();
1061                 } else {
1062                   if (withoutUnicodePtr != 0) {
1063                     withoutUnicodeBuffer[++withoutUnicodePtr] =
1064                       currentCharacter;
1065                   }
1066                 }
1067
1068                 while (currentCharacter != '\'') {
1069
1070                   /**** in PHP \r and \n are valid in string literals ****/
1071                   //                  if ((currentCharacter == '\n')
1072                   //                    || (currentCharacter == '\r')) {
1073                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1074                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1075                   //                      if (currentPosition + lookAhead == source.length)
1076                   //                        break;
1077                   //                      if (source[currentPosition + lookAhead] == '\n')
1078                   //                        break;
1079                   //                      if (source[currentPosition + lookAhead] == '\"') {
1080                   //                        currentPosition += lookAhead + 1;
1081                   //                        break;
1082                   //                      }
1083                   //                    }
1084                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1085                   //                  }
1086                   if (currentCharacter == '\\') {
1087                     int escapeSize = currentPosition;
1088                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1089                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1090                     scanSingleQuotedEscapeCharacter();
1091                     escapeSize = currentPosition - escapeSize;
1092                     if (withoutUnicodePtr == 0) {
1093                       //buffer all the entries that have been left aside....
1094                       withoutUnicodePtr =
1095                         currentPosition - escapeSize - 1 - startPosition;
1096                       System.arraycopy(
1097                         source,
1098                         startPosition,
1099                         withoutUnicodeBuffer,
1100                         1,
1101                         withoutUnicodePtr);
1102                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1103                         currentCharacter;
1104                     } else { //overwrite the / in the buffer
1105                       withoutUnicodeBuffer[withoutUnicodePtr] =
1106                         currentCharacter;
1107                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1108                         withoutUnicodePtr--;
1109                       }
1110                     }
1111                   }
1112                   // consume next character
1113                   unicodeAsBackSlash = false;
1114                   if (((currentCharacter = source[currentPosition++]) == '\\')
1115                     && (source[currentPosition] == 'u')) {
1116                     getNextUnicodeChar();
1117                   } else {
1118                     if (withoutUnicodePtr != 0) {
1119                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1120                         currentCharacter;
1121                     }
1122                   }
1123
1124                 }
1125               } catch (IndexOutOfBoundsException e) {
1126                 throw new InvalidInputException(UNTERMINATED_STRING);
1127               } catch (InvalidInputException e) {
1128                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1129                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1130                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1131                     if (currentPosition + lookAhead == source.length)
1132                       break;
1133                     if (source[currentPosition + lookAhead] == '\n')
1134                       break;
1135                     if (source[currentPosition + lookAhead] == '\'') {
1136                       currentPosition += lookAhead + 1;
1137                       break;
1138                     }
1139                   }
1140
1141                 }
1142                 throw e; // rethrow
1143               }
1144               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1145                 if (currentLine == null) {
1146                   currentLine = new NLSLine();
1147                   lines.add(currentLine);
1148                 }
1149                 currentLine.add(
1150                   new StringLiteral(
1151                     getCurrentTokenSourceString(),
1152                     startPosition,
1153                     currentPosition - 1));
1154               }
1155               return TokenNameStringConstant;
1156             case '"' :
1157               try {
1158                 // consume next character
1159                 unicodeAsBackSlash = false;
1160                 if (((currentCharacter = source[currentPosition++]) == '\\')
1161                   && (source[currentPosition] == 'u')) {
1162                   getNextUnicodeChar();
1163                 } else {
1164                   if (withoutUnicodePtr != 0) {
1165                     withoutUnicodeBuffer[++withoutUnicodePtr] =
1166                       currentCharacter;
1167                   }
1168                 }
1169
1170                 while (currentCharacter != '"') {
1171
1172                   /**** in PHP \r and \n are valid in string literals ****/
1173                   //                  if ((currentCharacter == '\n')
1174                   //                    || (currentCharacter == '\r')) {
1175                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1176                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1177                   //                      if (currentPosition + lookAhead == source.length)
1178                   //                        break;
1179                   //                      if (source[currentPosition + lookAhead] == '\n')
1180                   //                        break;
1181                   //                      if (source[currentPosition + lookAhead] == '\"') {
1182                   //                        currentPosition += lookAhead + 1;
1183                   //                        break;
1184                   //                      }
1185                   //                    }
1186                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1187                   //                  }
1188                   if (currentCharacter == '\\') {
1189                     int escapeSize = currentPosition;
1190                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1191                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1192                     scanDoubleQuotedEscapeCharacter();
1193                     escapeSize = currentPosition - escapeSize;
1194                     if (withoutUnicodePtr == 0) {
1195                       //buffer all the entries that have been left aside....
1196                       withoutUnicodePtr =
1197                         currentPosition - escapeSize - 1 - startPosition;
1198                       System.arraycopy(
1199                         source,
1200                         startPosition,
1201                         withoutUnicodeBuffer,
1202                         1,
1203                         withoutUnicodePtr);
1204                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1205                         currentCharacter;
1206                     } else { //overwrite the / in the buffer
1207                       withoutUnicodeBuffer[withoutUnicodePtr] =
1208                         currentCharacter;
1209                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1210                         withoutUnicodePtr--;
1211                       }
1212                     }
1213                   }
1214                   // consume next character
1215                   unicodeAsBackSlash = false;
1216                   if (((currentCharacter = source[currentPosition++]) == '\\')
1217                     && (source[currentPosition] == 'u')) {
1218                     getNextUnicodeChar();
1219                   } else {
1220                     if (withoutUnicodePtr != 0) {
1221                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1222                         currentCharacter;
1223                     }
1224                   }
1225
1226                 }
1227               } catch (IndexOutOfBoundsException e) {
1228                 throw new InvalidInputException(UNTERMINATED_STRING);
1229               } catch (InvalidInputException e) {
1230                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1231                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1232                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1233                     if (currentPosition + lookAhead == source.length)
1234                       break;
1235                     if (source[currentPosition + lookAhead] == '\n')
1236                       break;
1237                     if (source[currentPosition + lookAhead] == '\"') {
1238                       currentPosition += lookAhead + 1;
1239                       break;
1240                     }
1241                   }
1242
1243                 }
1244                 throw e; // rethrow
1245               }
1246               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1247                 if (currentLine == null) {
1248                   currentLine = new NLSLine();
1249                   lines.add(currentLine);
1250                 }
1251                 currentLine.add(
1252                   new StringLiteral(
1253                     getCurrentTokenSourceString(),
1254                     startPosition,
1255                     currentPosition - 1));
1256               }
1257               return TokenNameStringLiteral;
1258             case '`' :
1259               try {
1260                 // consume next character
1261                 unicodeAsBackSlash = false;
1262                 if (((currentCharacter = source[currentPosition++]) == '\\')
1263                   && (source[currentPosition] == 'u')) {
1264                   getNextUnicodeChar();
1265                 } else {
1266                   if (withoutUnicodePtr != 0) {
1267                     withoutUnicodeBuffer[++withoutUnicodePtr] =
1268                       currentCharacter;
1269                   }
1270                 }
1271
1272                 while (currentCharacter != '`') {
1273
1274                   /**** in PHP \r and \n are valid in string literals ****/
1275                   //                if ((currentCharacter == '\n')
1276                   //                  || (currentCharacter == '\r')) {
1277                   //                  // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1278                   //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1279                   //                    if (currentPosition + lookAhead == source.length)
1280                   //                      break;
1281                   //                    if (source[currentPosition + lookAhead] == '\n')
1282                   //                      break;
1283                   //                    if (source[currentPosition + lookAhead] == '\"') {
1284                   //                      currentPosition += lookAhead + 1;
1285                   //                      break;
1286                   //                    }
1287                   //                  }
1288                   //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1289                   //                }
1290                   if (currentCharacter == '\\') {
1291                     int escapeSize = currentPosition;
1292                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1293                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1294                     scanDoubleQuotedEscapeCharacter();
1295                     escapeSize = currentPosition - escapeSize;
1296                     if (withoutUnicodePtr == 0) {
1297                       //buffer all the entries that have been left aside....
1298                       withoutUnicodePtr =
1299                         currentPosition - escapeSize - 1 - startPosition;
1300                       System.arraycopy(
1301                         source,
1302                         startPosition,
1303                         withoutUnicodeBuffer,
1304                         1,
1305                         withoutUnicodePtr);
1306                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1307                         currentCharacter;
1308                     } else { //overwrite the / in the buffer
1309                       withoutUnicodeBuffer[withoutUnicodePtr] =
1310                         currentCharacter;
1311                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1312                         withoutUnicodePtr--;
1313                       }
1314                     }
1315                   }
1316                   // consume next character
1317                   unicodeAsBackSlash = false;
1318                   if (((currentCharacter = source[currentPosition++]) == '\\')
1319                     && (source[currentPosition] == 'u')) {
1320                     getNextUnicodeChar();
1321                   } else {
1322                     if (withoutUnicodePtr != 0) {
1323                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1324                         currentCharacter;
1325                     }
1326                   }
1327
1328                 }
1329               } catch (IndexOutOfBoundsException e) {
1330                 throw new InvalidInputException(UNTERMINATED_STRING);
1331               } catch (InvalidInputException e) {
1332                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1333                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1334                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1335                     if (currentPosition + lookAhead == source.length)
1336                       break;
1337                     if (source[currentPosition + lookAhead] == '\n')
1338                       break;
1339                     if (source[currentPosition + lookAhead] == '`') {
1340                       currentPosition += lookAhead + 1;
1341                       break;
1342                     }
1343                   }
1344
1345                 }
1346                 throw e; // rethrow
1347               }
1348               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1349                 if (currentLine == null) {
1350                   currentLine = new NLSLine();
1351                   lines.add(currentLine);
1352                 }
1353                 currentLine.add(
1354                   new StringLiteral(
1355                     getCurrentTokenSourceString(),
1356                     startPosition,
1357                     currentPosition - 1));
1358               }
1359               return TokenNameStringInterpolated;
1360             case '#' :
1361             case '/' :
1362               {
1363                 int test;
1364                 if ((currentCharacter == '#')
1365                   || (test = getNextChar('/', '*')) == 0) {
1366                   //line comment
1367                   int endPositionForLineComment = 0;
1368                   try { //get the next char
1369                     if (((currentCharacter = source[currentPosition++])
1370                       == '\\')
1371                       && (source[currentPosition] == 'u')) {
1372                       //-------------unicode traitement ------------
1373                       int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1374                       currentPosition++;
1375                       while (source[currentPosition] == 'u') {
1376                         currentPosition++;
1377                       }
1378                       if ((c1 =
1379                         Character.getNumericValue(source[currentPosition++]))
1380                         > 15
1381                         || c1 < 0
1382                         || (c2 =
1383                           Character.getNumericValue(source[currentPosition++]))
1384                           > 15
1385                         || c2 < 0
1386                         || (c3 =
1387                           Character.getNumericValue(source[currentPosition++]))
1388                           > 15
1389                         || c3 < 0
1390                         || (c4 =
1391                           Character.getNumericValue(source[currentPosition++]))
1392                           > 15
1393                         || c4 < 0) {
1394                         throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1395                       } else {
1396                         currentCharacter =
1397                           (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1398                       }
1399                     }
1400
1401                     //handle the \\u case manually into comment
1402                     if (currentCharacter == '\\') {
1403                       if (source[currentPosition] == '\\')
1404                         currentPosition++;
1405                     } //jump over the \\
1406                     boolean isUnicode = false;
1407                     while (currentCharacter != '\r'
1408                       && currentCharacter != '\n') {
1409                       if (currentCharacter == '?') {
1410                         if (getNextChar('>')) {
1411                           startPosition = currentPosition - 2;
1412                           phpMode = false;
1413                           return TokenNameStopPHP;
1414                         }
1415                       }
1416
1417                       //get the next char
1418                       isUnicode = false;
1419                       if (((currentCharacter = source[currentPosition++])
1420                         == '\\')
1421                         && (source[currentPosition] == 'u')) {
1422                         isUnicode = true;
1423                         //-------------unicode traitement ------------
1424                         int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1425                         currentPosition++;
1426                         while (source[currentPosition] == 'u') {
1427                           currentPosition++;
1428                         }
1429                         if ((c1 =
1430                           Character.getNumericValue(source[currentPosition++]))
1431                           > 15
1432                           || c1 < 0
1433                           || (c2 =
1434                             Character.getNumericValue(
1435                               source[currentPosition++]))
1436                             > 15
1437                           || c2 < 0
1438                           || (c3 =
1439                             Character.getNumericValue(
1440                               source[currentPosition++]))
1441                             > 15
1442                           || c3 < 0
1443                           || (c4 =
1444                             Character.getNumericValue(
1445                               source[currentPosition++]))
1446                             > 15
1447                           || c4 < 0) {
1448                           throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1449                         } else {
1450                           currentCharacter =
1451                             (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1452                         }
1453                       }
1454                       //handle the \\u case manually into comment
1455                       if (currentCharacter == '\\') {
1456                         if (source[currentPosition] == '\\')
1457                           currentPosition++;
1458                       } //jump over the \\
1459                     }
1460                     if (isUnicode) {
1461                       endPositionForLineComment = currentPosition - 6;
1462                     } else {
1463                       endPositionForLineComment = currentPosition - 1;
1464                     }
1465                     recordComment(false);
1466                     if ((currentCharacter == '\r')
1467                       || (currentCharacter == '\n')) {
1468                       checkNonExternalizeString();
1469                       if (recordLineSeparator) {
1470                         if (isUnicode) {
1471                           pushUnicodeLineSeparator();
1472                         } else {
1473                           pushLineSeparator();
1474                         }
1475                       } else {
1476                         currentLine = null;
1477                       }
1478                     }
1479                     if (tokenizeComments) {
1480                       if (!isUnicode) {
1481                         currentPosition = endPositionForLineComment;
1482                         // reset one character behind
1483                       }
1484                       return TokenNameCOMMENT_LINE;
1485                     }
1486                   } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1487                     if (tokenizeComments) {
1488                       currentPosition--;
1489                       // reset one character behind
1490                       return TokenNameCOMMENT_LINE;
1491                     }
1492                   }
1493                   break;
1494                 }
1495                 if (test > 0) {
1496                   //traditional and annotation comment
1497                   boolean isJavadoc = false, star = false;
1498                   // consume next character
1499                   unicodeAsBackSlash = false;
1500                   if (((currentCharacter = source[currentPosition++]) == '\\')
1501                     && (source[currentPosition] == 'u')) {
1502                     getNextUnicodeChar();
1503                   } else {
1504                     if (withoutUnicodePtr != 0) {
1505                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1506                         currentCharacter;
1507                     }
1508                   }
1509
1510                   if (currentCharacter == '*') {
1511                     isJavadoc = true;
1512                     star = true;
1513                   }
1514                   if ((currentCharacter == '\r')
1515                     || (currentCharacter == '\n')) {
1516                     checkNonExternalizeString();
1517                     if (recordLineSeparator) {
1518                       pushLineSeparator();
1519                     } else {
1520                       currentLine = null;
1521                     }
1522                   }
1523                   try { //get the next char
1524                     if (((currentCharacter = source[currentPosition++])
1525                       == '\\')
1526                       && (source[currentPosition] == 'u')) {
1527                       //-------------unicode traitement ------------
1528                       getNextUnicodeChar();
1529                     }
1530                     //handle the \\u case manually into comment
1531                     if (currentCharacter == '\\') {
1532                       if (source[currentPosition] == '\\')
1533                         currentPosition++;
1534                       //jump over the \\
1535                     }
1536                     // empty comment is not a javadoc /**/
1537                     if (currentCharacter == '/') {
1538                       isJavadoc = false;
1539                     }
1540                     //loop until end of comment */
1541                     while ((currentCharacter != '/') || (!star)) {
1542                       if ((currentCharacter == '\r')
1543                         || (currentCharacter == '\n')) {
1544                         checkNonExternalizeString();
1545                         if (recordLineSeparator) {
1546                           pushLineSeparator();
1547                         } else {
1548                           currentLine = null;
1549                         }
1550                       }
1551                       star = currentCharacter == '*';
1552                       //get next char
1553                       if (((currentCharacter = source[currentPosition++])
1554                         == '\\')
1555                         && (source[currentPosition] == 'u')) {
1556                         //-------------unicode traitement ------------
1557                         getNextUnicodeChar();
1558                       }
1559                       //handle the \\u case manually into comment
1560                       if (currentCharacter == '\\') {
1561                         if (source[currentPosition] == '\\')
1562                           currentPosition++;
1563                       } //jump over the \\
1564                     }
1565                     recordComment(isJavadoc);
1566                     if (tokenizeComments) {
1567                       if (isJavadoc)
1568                         return TokenNameCOMMENT_PHPDOC;
1569                       return TokenNameCOMMENT_BLOCK;
1570                     }
1571                   } catch (IndexOutOfBoundsException e) {
1572                     throw new InvalidInputException(UNTERMINATED_COMMENT);
1573                   }
1574                   break;
1575                 }
1576                 if (getNextChar('='))
1577                   return TokenNameDIVIDE_EQUAL;
1578                 return TokenNameDIVIDE;
1579               }
1580             case '\u001a' :
1581               if (atEnd())
1582                 return TokenNameEOF;
1583               //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1584               throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1585
1586             default :
1587               if (currentCharacter == '$') {
1588                 while ( (currentCharacter = source[currentPosition++])=='$') {
1589                 }
1590                 if (currentCharacter == '{')
1591                   return TokenNameDOLLAR_LBRACE;
1592                 if (isPHPIdentifierStart(currentCharacter))
1593                   return scanIdentifierOrKeyword(true);
1594                 return TokenNameERROR;
1595               }
1596               if (isPHPIdentifierStart(currentCharacter))
1597                 return scanIdentifierOrKeyword(false);
1598               if (Character.isDigit(currentCharacter))
1599                 return scanNumber(false);
1600               return TokenNameERROR;
1601           }
1602         }
1603       } //-----------------end switch while try--------------------
1604       catch (IndexOutOfBoundsException e) {
1605       }
1606     }
1607     return TokenNameEOF;
1608   }
1609
1610   public final void getNextUnicodeChar()
1611     throws IndexOutOfBoundsException, InvalidInputException {
1612     //VOID
1613     //handle the case of unicode.
1614     //when a unicode appears then we must use a buffer that holds char internal values
1615     //At the end of this method currentCharacter holds the new visited char
1616     //and currentPosition points right next after it
1617
1618     //ALL getNextChar.... ARE OPTIMIZED COPIES
1619
1620     int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1621     currentPosition++;
1622     while (source[currentPosition] == 'u') {
1623       currentPosition++;
1624       unicodeSize++;
1625     }
1626
1627     if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1628       || c1 < 0
1629       || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1630       || c2 < 0
1631       || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1632       || c3 < 0
1633       || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1634       || c4 < 0) {
1635       throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1636     } else {
1637       currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1638       //need the unicode buffer
1639       if (withoutUnicodePtr == 0) {
1640         //buffer all the entries that have been left aside....
1641         withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1642         System.arraycopy(
1643           source,
1644           startPosition,
1645           withoutUnicodeBuffer,
1646           1,
1647           withoutUnicodePtr);
1648       }
1649       //fill the buffer with the char
1650       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1651     }
1652     unicodeAsBackSlash = currentCharacter == '\\';
1653   }
1654   /* Tokenize a method body, assuming that curly brackets are properly balanced.
1655    */
1656   public final void jumpOverMethodBody() {
1657
1658     this.wasAcr = false;
1659     int found = 1;
1660     try {
1661       while (true) { //loop for jumping over comments
1662         // ---------Consume white space and handles startPosition---------
1663         boolean isWhiteSpace;
1664         do {
1665           startPosition = currentPosition;
1666           if (((currentCharacter = source[currentPosition++]) == '\\')
1667             && (source[currentPosition] == 'u')) {
1668             isWhiteSpace = jumpOverUnicodeWhiteSpace();
1669           } else {
1670             if (recordLineSeparator
1671               && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1672               pushLineSeparator();
1673             isWhiteSpace = Character.isWhitespace(currentCharacter);
1674           }
1675         } while (isWhiteSpace);
1676
1677         // -------consume token until } is found---------
1678         switch (currentCharacter) {
1679           case '{' :
1680             found++;
1681             break;
1682           case '}' :
1683             found--;
1684             if (found == 0)
1685               return;
1686             break;
1687           case '\'' :
1688             {
1689               boolean test;
1690               test = getNextChar('\\');
1691               if (test) {
1692                 try {
1693                   scanDoubleQuotedEscapeCharacter();
1694                 } catch (InvalidInputException ex) {
1695                 };
1696               } else {
1697                 try { // consume next character
1698                   unicodeAsBackSlash = false;
1699                   if (((currentCharacter = source[currentPosition++]) == '\\')
1700                     && (source[currentPosition] == 'u')) {
1701                     getNextUnicodeChar();
1702                   } else {
1703                     if (withoutUnicodePtr != 0) {
1704                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1705                         currentCharacter;
1706                     }
1707                   }
1708                 } catch (InvalidInputException ex) {
1709                 };
1710               }
1711               getNextChar('\'');
1712               break;
1713             }
1714           case '"' :
1715             try {
1716               try { // consume next character
1717                 unicodeAsBackSlash = false;
1718                 if (((currentCharacter = source[currentPosition++]) == '\\')
1719                   && (source[currentPosition] == 'u')) {
1720                   getNextUnicodeChar();
1721                 } else {
1722                   if (withoutUnicodePtr != 0) {
1723                     withoutUnicodeBuffer[++withoutUnicodePtr] =
1724                       currentCharacter;
1725                   }
1726                 }
1727               } catch (InvalidInputException ex) {
1728               };
1729               while (currentCharacter != '"') {
1730                 if (currentCharacter == '\r') {
1731                   if (source[currentPosition] == '\n')
1732                     currentPosition++;
1733                   break;
1734                   // the string cannot go further that the line
1735                 }
1736                 if (currentCharacter == '\n') {
1737                   break;
1738                   // the string cannot go further that the line
1739                 }
1740                 if (currentCharacter == '\\') {
1741                   try {
1742                     scanDoubleQuotedEscapeCharacter();
1743                   } catch (InvalidInputException ex) {
1744                   };
1745                 }
1746                 try { // consume next character
1747                   unicodeAsBackSlash = false;
1748                   if (((currentCharacter = source[currentPosition++]) == '\\')
1749                     && (source[currentPosition] == 'u')) {
1750                     getNextUnicodeChar();
1751                   } else {
1752                     if (withoutUnicodePtr != 0) {
1753                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1754                         currentCharacter;
1755                     }
1756                   }
1757                 } catch (InvalidInputException ex) {
1758                 };
1759               }
1760             } catch (IndexOutOfBoundsException e) {
1761               return;
1762             }
1763             break;
1764           case '/' :
1765             {
1766               int test;
1767               if ((test = getNextChar('/', '*')) == 0) {
1768                 //line comment
1769                 try {
1770                   //get the next char
1771                   if (((currentCharacter = source[currentPosition++]) == '\\')
1772                     && (source[currentPosition] == 'u')) {
1773                     //-------------unicode traitement ------------
1774                     int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1775                     currentPosition++;
1776                     while (source[currentPosition] == 'u') {
1777                       currentPosition++;
1778                     }
1779                     if ((c1 =
1780                       Character.getNumericValue(source[currentPosition++]))
1781                       > 15
1782                       || c1 < 0
1783                       || (c2 =
1784                         Character.getNumericValue(source[currentPosition++]))
1785                         > 15
1786                       || c2 < 0
1787                       || (c3 =
1788                         Character.getNumericValue(source[currentPosition++]))
1789                         > 15
1790                       || c3 < 0
1791                       || (c4 =
1792                         Character.getNumericValue(source[currentPosition++]))
1793                         > 15
1794                       || c4 < 0) {
1795                       //error don't care of the value
1796                       currentCharacter = 'A';
1797                     } //something different from \n and \r
1798                     else {
1799                       currentCharacter =
1800                         (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1801                     }
1802                   }
1803
1804                   while (currentCharacter != '\r'
1805                     && currentCharacter != '\n') {
1806                     //get the next char
1807                     if (((currentCharacter = source[currentPosition++])
1808                       == '\\')
1809                       && (source[currentPosition] == 'u')) {
1810                       //-------------unicode traitement ------------
1811                       int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1812                       currentPosition++;
1813                       while (source[currentPosition] == 'u') {
1814                         currentPosition++;
1815                       }
1816                       if ((c1 =
1817                         Character.getNumericValue(source[currentPosition++]))
1818                         > 15
1819                         || c1 < 0
1820                         || (c2 =
1821                           Character.getNumericValue(source[currentPosition++]))
1822                           > 15
1823                         || c2 < 0
1824                         || (c3 =
1825                           Character.getNumericValue(source[currentPosition++]))
1826                           > 15
1827                         || c3 < 0
1828                         || (c4 =
1829                           Character.getNumericValue(source[currentPosition++]))
1830                           > 15
1831                         || c4 < 0) {
1832                         //error don't care of the value
1833                         currentCharacter = 'A';
1834                       } //something different from \n and \r
1835                       else {
1836                         currentCharacter =
1837                           (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1838                       }
1839                     }
1840                   }
1841                   if (recordLineSeparator
1842                     && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1843                     pushLineSeparator();
1844                 } catch (IndexOutOfBoundsException e) {
1845                 } //an eof will them be generated
1846                 break;
1847               }
1848               if (test > 0) {
1849                 //traditional and annotation comment
1850                 boolean star = false;
1851                 try { // consume next character
1852                   unicodeAsBackSlash = false;
1853                   if (((currentCharacter = source[currentPosition++]) == '\\')
1854                     && (source[currentPosition] == 'u')) {
1855                     getNextUnicodeChar();
1856                   } else {
1857                     if (withoutUnicodePtr != 0) {
1858                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1859                         currentCharacter;
1860                     }
1861                   };
1862                 } catch (InvalidInputException ex) {
1863                 };
1864                 if (currentCharacter == '*') {
1865                   star = true;
1866                 }
1867                 if (recordLineSeparator
1868                   && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1869                   pushLineSeparator();
1870                 try { //get the next char
1871                   if (((currentCharacter = source[currentPosition++]) == '\\')
1872                     && (source[currentPosition] == 'u')) {
1873                     //-------------unicode traitement ------------
1874                     int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1875                     currentPosition++;
1876                     while (source[currentPosition] == 'u') {
1877                       currentPosition++;
1878                     }
1879                     if ((c1 =
1880                       Character.getNumericValue(source[currentPosition++]))
1881                       > 15
1882                       || c1 < 0
1883                       || (c2 =
1884                         Character.getNumericValue(source[currentPosition++]))
1885                         > 15
1886                       || c2 < 0
1887                       || (c3 =
1888                         Character.getNumericValue(source[currentPosition++]))
1889                         > 15
1890                       || c3 < 0
1891                       || (c4 =
1892                         Character.getNumericValue(source[currentPosition++]))
1893                         > 15
1894                       || c4 < 0) {
1895                       //error don't care of the value
1896                       currentCharacter = 'A';
1897                     } //something different from * and /
1898                     else {
1899                       currentCharacter =
1900                         (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1901                     }
1902                   }
1903                   //loop until end of comment */
1904                   while ((currentCharacter != '/') || (!star)) {
1905                     if (recordLineSeparator
1906                       && ((currentCharacter == '\r')
1907                         || (currentCharacter == '\n')))
1908                       pushLineSeparator();
1909                     star = currentCharacter == '*';
1910                     //get next char
1911                     if (((currentCharacter = source[currentPosition++])
1912                       == '\\')
1913                       && (source[currentPosition] == 'u')) {
1914                       //-------------unicode traitement ------------
1915                       int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1916                       currentPosition++;
1917                       while (source[currentPosition] == 'u') {
1918                         currentPosition++;
1919                       }
1920                       if ((c1 =
1921                         Character.getNumericValue(source[currentPosition++]))
1922                         > 15
1923                         || c1 < 0
1924                         || (c2 =
1925                           Character.getNumericValue(source[currentPosition++]))
1926                           > 15
1927                         || c2 < 0
1928                         || (c3 =
1929                           Character.getNumericValue(source[currentPosition++]))
1930                           > 15
1931                         || c3 < 0
1932                         || (c4 =
1933                           Character.getNumericValue(source[currentPosition++]))
1934                           > 15
1935                         || c4 < 0) {
1936                         //error don't care of the value
1937                         currentCharacter = 'A';
1938                       } //something different from * and /
1939                       else {
1940                         currentCharacter =
1941                           (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1942                       }
1943                     }
1944                   }
1945                 } catch (IndexOutOfBoundsException e) {
1946                   return;
1947                 }
1948                 break;
1949               }
1950               break;
1951             }
1952
1953           default :
1954             if (isPHPIdentifierStart(currentCharacter)
1955               || currentCharacter == '$') {
1956               try {
1957                 scanIdentifierOrKeyword((currentCharacter == '$'));
1958               } catch (InvalidInputException ex) {
1959               };
1960               break;
1961             }
1962             if (Character.isDigit(currentCharacter)) {
1963               try {
1964                 scanNumber(false);
1965               } catch (InvalidInputException ex) {
1966               };
1967               break;
1968             }
1969         }
1970       }
1971       //-----------------end switch while try--------------------
1972     } catch (IndexOutOfBoundsException e) {
1973     } catch (InvalidInputException e) {
1974     }
1975     return;
1976   }
1977   public final boolean jumpOverUnicodeWhiteSpace()
1978     throws InvalidInputException {
1979     //BOOLEAN
1980     //handle the case of unicode. Jump over the next whiteSpace
1981     //making startPosition pointing on the next available char
1982     //On false, the currentCharacter is filled up with a potential
1983     //correct char
1984
1985     try {
1986       this.wasAcr = false;
1987       int c1, c2, c3, c4;
1988       int unicodeSize = 6;
1989       currentPosition++;
1990       while (source[currentPosition] == 'u') {
1991         currentPosition++;
1992         unicodeSize++;
1993       }
1994
1995       if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1996         || c1 < 0)
1997         || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
1998           || c2 < 0)
1999         || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2000           || c3 < 0)
2001         || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2002           || c4 < 0)) {
2003         throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2004       }
2005
2006       currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2007       if (recordLineSeparator
2008         && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2009         pushLineSeparator();
2010       if (Character.isWhitespace(currentCharacter))
2011         return true;
2012
2013       //buffer the new char which is not a white space
2014       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2015       //withoutUnicodePtr == 1 is true here
2016       return false;
2017     } catch (IndexOutOfBoundsException e) {
2018       throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2019     }
2020   }
2021   public final int[] getLineEnds() {
2022     //return a bounded copy of this.lineEnds
2023
2024     int[] copy;
2025     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2026     return copy;
2027   }
2028
2029   public char[] getSource() {
2030     return this.source;
2031   }
2032   final char[] optimizedCurrentTokenSource1() {
2033     //return always the same char[] build only once
2034
2035     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2036     char charOne = source[startPosition];
2037     switch (charOne) {
2038       case 'a' :
2039         return charArray_a;
2040       case 'b' :
2041         return charArray_b;
2042       case 'c' :
2043         return charArray_c;
2044       case 'd' :
2045         return charArray_d;
2046       case 'e' :
2047         return charArray_e;
2048       case 'f' :
2049         return charArray_f;
2050       case 'g' :
2051         return charArray_g;
2052       case 'h' :
2053         return charArray_h;
2054       case 'i' :
2055         return charArray_i;
2056       case 'j' :
2057         return charArray_j;
2058       case 'k' :
2059         return charArray_k;
2060       case 'l' :
2061         return charArray_l;
2062       case 'm' :
2063         return charArray_m;
2064       case 'n' :
2065         return charArray_n;
2066       case 'o' :
2067         return charArray_o;
2068       case 'p' :
2069         return charArray_p;
2070       case 'q' :
2071         return charArray_q;
2072       case 'r' :
2073         return charArray_r;
2074       case 's' :
2075         return charArray_s;
2076       case 't' :
2077         return charArray_t;
2078       case 'u' :
2079         return charArray_u;
2080       case 'v' :
2081         return charArray_v;
2082       case 'w' :
2083         return charArray_w;
2084       case 'x' :
2085         return charArray_x;
2086       case 'y' :
2087         return charArray_y;
2088       case 'z' :
2089         return charArray_z;
2090       default :
2091         return new char[] { charOne };
2092     }
2093   }
2094   final char[] optimizedCurrentTokenSource2() {
2095     //try to return the same char[] build only once
2096
2097     char c0, c1;
2098     int hash =
2099       (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2100         % TableSize;
2101     char[][] table = charArray_length[0][hash];
2102     int i = newEntry2;
2103     while (++i < InternalTableSize) {
2104       char[] charArray = table[i];
2105       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2106         return charArray;
2107     }
2108     //---------other side---------
2109     i = -1;
2110     int max = newEntry2;
2111     while (++i <= max) {
2112       char[] charArray = table[i];
2113       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2114         return charArray;
2115     }
2116     //--------add the entry-------
2117     if (++max >= InternalTableSize)
2118       max = 0;
2119     char[] r;
2120     table[max] = (r = new char[] { c0, c1 });
2121     newEntry2 = max;
2122     return r;
2123   }
2124   final char[] optimizedCurrentTokenSource3() {
2125     //try to return the same char[] build only once
2126
2127     char c0, c1, c2;
2128     int hash =
2129       (((c0 = source[startPosition]) << 12)
2130         + ((c1 = source[startPosition + 1]) << 6)
2131         + (c2 = source[startPosition + 2]))
2132         % TableSize;
2133     char[][] table = charArray_length[1][hash];
2134     int i = newEntry3;
2135     while (++i < InternalTableSize) {
2136       char[] charArray = table[i];
2137       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2138         return charArray;
2139     }
2140     //---------other side---------
2141     i = -1;
2142     int max = newEntry3;
2143     while (++i <= max) {
2144       char[] charArray = table[i];
2145       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2146         return charArray;
2147     }
2148     //--------add the entry-------
2149     if (++max >= InternalTableSize)
2150       max = 0;
2151     char[] r;
2152     table[max] = (r = new char[] { c0, c1, c2 });
2153     newEntry3 = max;
2154     return r;
2155   }
2156   final char[] optimizedCurrentTokenSource4() {
2157     //try to return the same char[] build only once
2158
2159     char c0, c1, c2, c3;
2160     long hash =
2161       ((((long) (c0 = source[startPosition])) << 18)
2162         + ((c1 = source[startPosition + 1]) << 12)
2163         + ((c2 = source[startPosition + 2]) << 6)
2164         + (c3 = source[startPosition + 3]))
2165         % TableSize;
2166     char[][] table = charArray_length[2][(int) hash];
2167     int i = newEntry4;
2168     while (++i < InternalTableSize) {
2169       char[] charArray = table[i];
2170       if ((c0 == charArray[0])
2171         && (c1 == charArray[1])
2172         && (c2 == charArray[2])
2173         && (c3 == charArray[3]))
2174         return charArray;
2175     }
2176     //---------other side---------
2177     i = -1;
2178     int max = newEntry4;
2179     while (++i <= max) {
2180       char[] charArray = table[i];
2181       if ((c0 == charArray[0])
2182         && (c1 == charArray[1])
2183         && (c2 == charArray[2])
2184         && (c3 == charArray[3]))
2185         return charArray;
2186     }
2187     //--------add the entry-------
2188     if (++max >= InternalTableSize)
2189       max = 0;
2190     char[] r;
2191     table[max] = (r = new char[] { c0, c1, c2, c3 });
2192     newEntry4 = max;
2193     return r;
2194
2195   }
2196   final char[] optimizedCurrentTokenSource5() {
2197     //try to return the same char[] build only once
2198
2199     char c0, c1, c2, c3, c4;
2200     long hash =
2201       ((((long) (c0 = source[startPosition])) << 24)
2202         + (((long) (c1 = source[startPosition + 1])) << 18)
2203         + ((c2 = source[startPosition + 2]) << 12)
2204         + ((c3 = source[startPosition + 3]) << 6)
2205         + (c4 = source[startPosition + 4]))
2206         % TableSize;
2207     char[][] table = charArray_length[3][(int) hash];
2208     int i = newEntry5;
2209     while (++i < InternalTableSize) {
2210       char[] charArray = table[i];
2211       if ((c0 == charArray[0])
2212         && (c1 == charArray[1])
2213         && (c2 == charArray[2])
2214         && (c3 == charArray[3])
2215         && (c4 == charArray[4]))
2216         return charArray;
2217     }
2218     //---------other side---------
2219     i = -1;
2220     int max = newEntry5;
2221     while (++i <= max) {
2222       char[] charArray = table[i];
2223       if ((c0 == charArray[0])
2224         && (c1 == charArray[1])
2225         && (c2 == charArray[2])
2226         && (c3 == charArray[3])
2227         && (c4 == charArray[4]))
2228         return charArray;
2229     }
2230     //--------add the entry-------
2231     if (++max >= InternalTableSize)
2232       max = 0;
2233     char[] r;
2234     table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2235     newEntry5 = max;
2236     return r;
2237
2238   }
2239   final char[] optimizedCurrentTokenSource6() {
2240     //try to return the same char[] build only once
2241
2242     char c0, c1, c2, c3, c4, c5;
2243     long hash =
2244       ((((long) (c0 = source[startPosition])) << 32)
2245         + (((long) (c1 = source[startPosition + 1])) << 24)
2246         + (((long) (c2 = source[startPosition + 2])) << 18)
2247         + ((c3 = source[startPosition + 3]) << 12)
2248         + ((c4 = source[startPosition + 4]) << 6)
2249         + (c5 = source[startPosition + 5]))
2250         % TableSize;
2251     char[][] table = charArray_length[4][(int) hash];
2252     int i = newEntry6;
2253     while (++i < InternalTableSize) {
2254       char[] charArray = table[i];
2255       if ((c0 == charArray[0])
2256         && (c1 == charArray[1])
2257         && (c2 == charArray[2])
2258         && (c3 == charArray[3])
2259         && (c4 == charArray[4])
2260         && (c5 == charArray[5]))
2261         return charArray;
2262     }
2263     //---------other side---------
2264     i = -1;
2265     int max = newEntry6;
2266     while (++i <= max) {
2267       char[] charArray = table[i];
2268       if ((c0 == charArray[0])
2269         && (c1 == charArray[1])
2270         && (c2 == charArray[2])
2271         && (c3 == charArray[3])
2272         && (c4 == charArray[4])
2273         && (c5 == charArray[5]))
2274         return charArray;
2275     }
2276     //--------add the entry-------
2277     if (++max >= InternalTableSize)
2278       max = 0;
2279     char[] r;
2280     table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2281     newEntry6 = max;
2282     return r;
2283   }
2284   public final void pushLineSeparator() throws InvalidInputException {
2285     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2286     final int INCREMENT = 250;
2287
2288     if (this.checkNonExternalizedStringLiterals) {
2289       // reinitialize the current line for non externalize strings purpose
2290       currentLine = null;
2291     }
2292     //currentCharacter is at position currentPosition-1
2293
2294     // cr 000D
2295     if (currentCharacter == '\r') {
2296       int separatorPos = currentPosition - 1;
2297       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2298         return;
2299       //System.out.println("CR-" + separatorPos);
2300       try {
2301         lineEnds[++linePtr] = separatorPos;
2302       } catch (IndexOutOfBoundsException e) {
2303         //linePtr value is correct
2304         int oldLength = lineEnds.length;
2305         int[] old = lineEnds;
2306         lineEnds = new int[oldLength + INCREMENT];
2307         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2308         lineEnds[linePtr] = separatorPos;
2309       }
2310       // look-ahead for merged cr+lf
2311       try {
2312         if (source[currentPosition] == '\n') {
2313           //System.out.println("look-ahead LF-" + currentPosition);
2314           lineEnds[linePtr] = currentPosition;
2315           currentPosition++;
2316           wasAcr = false;
2317         } else {
2318           wasAcr = true;
2319         }
2320       } catch (IndexOutOfBoundsException e) {
2321         wasAcr = true;
2322       }
2323     } else {
2324       // lf 000A
2325       if (currentCharacter == '\n') {
2326         //must merge eventual cr followed by lf
2327         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2328           //System.out.println("merge LF-" + (currentPosition - 1));
2329           lineEnds[linePtr] = currentPosition - 1;
2330         } else {
2331           int separatorPos = currentPosition - 1;
2332           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2333             return;
2334           // System.out.println("LF-" + separatorPos);
2335           try {
2336             lineEnds[++linePtr] = separatorPos;
2337           } catch (IndexOutOfBoundsException e) {
2338             //linePtr value is correct
2339             int oldLength = lineEnds.length;
2340             int[] old = lineEnds;
2341             lineEnds = new int[oldLength + INCREMENT];
2342             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2343             lineEnds[linePtr] = separatorPos;
2344           }
2345         }
2346         wasAcr = false;
2347       }
2348     }
2349   }
2350   public final void pushUnicodeLineSeparator() {
2351     // isUnicode means that the \r or \n has been read as a unicode character
2352
2353     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2354
2355     final int INCREMENT = 250;
2356     //currentCharacter is at position currentPosition-1
2357
2358     if (this.checkNonExternalizedStringLiterals) {
2359       // reinitialize the current line for non externalize strings purpose
2360       currentLine = null;
2361     }
2362
2363     // cr 000D
2364     if (currentCharacter == '\r') {
2365       int separatorPos = currentPosition - 6;
2366       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2367         return;
2368       //System.out.println("CR-" + separatorPos);
2369       try {
2370         lineEnds[++linePtr] = separatorPos;
2371       } catch (IndexOutOfBoundsException e) {
2372         //linePtr value is correct
2373         int oldLength = lineEnds.length;
2374         int[] old = lineEnds;
2375         lineEnds = new int[oldLength + INCREMENT];
2376         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2377         lineEnds[linePtr] = separatorPos;
2378       }
2379       // look-ahead for merged cr+lf
2380       if (source[currentPosition] == '\n') {
2381         //System.out.println("look-ahead LF-" + currentPosition);
2382         lineEnds[linePtr] = currentPosition;
2383         currentPosition++;
2384         wasAcr = false;
2385       } else {
2386         wasAcr = true;
2387       }
2388     } else {
2389       // lf 000A
2390       if (currentCharacter == '\n') {
2391         //must merge eventual cr followed by lf
2392         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2393           //System.out.println("merge LF-" + (currentPosition - 1));
2394           lineEnds[linePtr] = currentPosition - 6;
2395         } else {
2396           int separatorPos = currentPosition - 6;
2397           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2398             return;
2399           // System.out.println("LF-" + separatorPos);
2400           try {
2401             lineEnds[++linePtr] = separatorPos;
2402           } catch (IndexOutOfBoundsException e) {
2403             //linePtr value is correct
2404             int oldLength = lineEnds.length;
2405             int[] old = lineEnds;
2406             lineEnds = new int[oldLength + INCREMENT];
2407             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2408             lineEnds[linePtr] = separatorPos;
2409           }
2410         }
2411         wasAcr = false;
2412       }
2413     }
2414   }
2415   public final void recordComment(boolean isJavadoc) {
2416
2417     // a new annotation comment is recorded
2418     try {
2419       commentStops[++commentPtr] =
2420         isJavadoc ? currentPosition : -currentPosition;
2421     } catch (IndexOutOfBoundsException e) {
2422       int oldStackLength = commentStops.length;
2423       int[] oldStack = commentStops;
2424       commentStops = new int[oldStackLength + 30];
2425       System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2426       commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2427       //grows the positions buffers too
2428       int[] old = commentStarts;
2429       commentStarts = new int[oldStackLength + 30];
2430       System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2431     }
2432
2433     //the buffer is of a correct size here
2434     commentStarts[commentPtr] = startPosition;
2435   }
2436   public void resetTo(int begin, int end) {
2437     //reset the scanner to a given position where it may rescan again
2438
2439     diet = false;
2440     initialPosition = startPosition = currentPosition = begin;
2441     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2442     commentPtr = -1; // reset comment stack
2443   }
2444
2445   public final void scanSingleQuotedEscapeCharacter()
2446     throws InvalidInputException {
2447     // the string with "\\u" is a legal string of two chars \ and u
2448     //thus we use a direct access to the source (for regular cases).
2449
2450     if (unicodeAsBackSlash) {
2451       // consume next character
2452       unicodeAsBackSlash = false;
2453       if (((currentCharacter = source[currentPosition++]) == '\\')
2454         && (source[currentPosition] == 'u')) {
2455         getNextUnicodeChar();
2456       } else {
2457         if (withoutUnicodePtr != 0) {
2458           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2459         }
2460       }
2461     } else
2462       currentCharacter = source[currentPosition++];
2463     switch (currentCharacter) {
2464       case '\'' :
2465         currentCharacter = '\'';
2466         break;
2467       case '\\' :
2468         currentCharacter = '\\';
2469         break;
2470       default :
2471         currentCharacter = '\\';
2472         currentPosition--;
2473     }
2474   }
2475
2476   public final void scanDoubleQuotedEscapeCharacter()
2477     throws InvalidInputException {
2478     // the string with "\\u" is a legal string of two chars \ and u
2479     //thus we use a direct access to the source (for regular cases).
2480
2481     if (unicodeAsBackSlash) {
2482       // consume next character
2483       unicodeAsBackSlash = false;
2484       if (((currentCharacter = source[currentPosition++]) == '\\')
2485         && (source[currentPosition] == 'u')) {
2486         getNextUnicodeChar();
2487       } else {
2488         if (withoutUnicodePtr != 0) {
2489           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2490         }
2491       }
2492     } else
2493       currentCharacter = source[currentPosition++];
2494     switch (currentCharacter) {
2495       //      case 'b' :
2496       //        currentCharacter = '\b';
2497       //        break;
2498       case 't' :
2499         currentCharacter = '\t';
2500         break;
2501       case 'n' :
2502         currentCharacter = '\n';
2503         break;
2504         //      case 'f' :
2505         //        currentCharacter = '\f';
2506         //        break;
2507       case 'r' :
2508         currentCharacter = '\r';
2509         break;
2510       case '\"' :
2511         currentCharacter = '\"';
2512         break;
2513       case '\'' :
2514         currentCharacter = '\'';
2515         break;
2516       case '\\' :
2517         currentCharacter = '\\';
2518         break;
2519       case '$' :
2520         currentCharacter = '$';
2521         break;
2522       default :
2523         // -----------octal escape--------------
2524         // OctalDigit
2525         // OctalDigit OctalDigit
2526         // ZeroToThree OctalDigit OctalDigit
2527
2528         int number = Character.getNumericValue(currentCharacter);
2529         if (number >= 0 && number <= 7) {
2530           boolean zeroToThreeNot = number > 3;
2531           if (Character
2532             .isDigit(currentCharacter = source[currentPosition++])) {
2533             int digit = Character.getNumericValue(currentCharacter);
2534             if (digit >= 0 && digit <= 7) {
2535               number = (number * 8) + digit;
2536               if (Character
2537                 .isDigit(currentCharacter = source[currentPosition++])) {
2538                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2539                   currentPosition--;
2540                 } else {
2541                   digit = Character.getNumericValue(currentCharacter);
2542                   if (digit >= 0 && digit <= 7) {
2543                     // has read \ZeroToThree OctalDigit OctalDigit
2544                     number = (number * 8) + digit;
2545                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2546                     currentPosition--;
2547                   }
2548                 }
2549               } else { // has read \OctalDigit NonDigit--> ignore last character
2550                 currentPosition--;
2551               }
2552             } else { // has read \OctalDigit NonOctalDigit--> ignore last character
2553               currentPosition--;
2554             }
2555           } else { // has read \OctalDigit --> ignore last character
2556             currentPosition--;
2557           }
2558           if (number > 255)
2559             throw new InvalidInputException(INVALID_ESCAPE);
2560           currentCharacter = (char) number;
2561         }
2562         //else
2563         //     throw new InvalidInputException(INVALID_ESCAPE);
2564     }
2565   }
2566
2567   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2568   //    return scanIdentifierOrKeyword( false );
2569   //  }
2570
2571   public int scanIdentifierOrKeyword(boolean isVariable)
2572     throws InvalidInputException {
2573     //test keywords
2574
2575     //first dispatch on the first char.
2576     //then the length. If there are several
2577     //keywors with the same length AND the same first char, then do another
2578     //disptach on the second char :-)...cool....but fast !
2579
2580     useAssertAsAnIndentifier = false;
2581
2582     while (getNextCharAsJavaIdentifierPart()) {
2583     };
2584
2585     if (isVariable) {
2586       return TokenNameVariable;
2587     }
2588     int index, length;
2589     char[] data;
2590     char firstLetter;
2591     if (withoutUnicodePtr == 0)
2592
2593       //quick test on length == 1 but not on length > 12 while most identifier
2594       //have a length which is <= 12...but there are lots of identifier with
2595       //only one char....
2596
2597       {
2598       if ((length = currentPosition - startPosition) == 1)
2599         return TokenNameIdentifier;
2600       //  data = source;
2601       data = new char[length];
2602       index = startPosition;
2603       for (int i = 0; i < length; i++) {
2604         data[i] = Character.toLowerCase(source[index + i]);
2605       }
2606       index = 0;
2607     } else {
2608       if ((length = withoutUnicodePtr) == 1)
2609         return TokenNameIdentifier;
2610       // data = withoutUnicodeBuffer;
2611       data = new char[withoutUnicodeBuffer.length];
2612       for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2613         data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2614       }
2615       index = 1;
2616     }
2617
2618     firstLetter = data[index];
2619     switch (firstLetter) {
2620
2621       case 'a' : // as and array
2622         switch (length) {
2623           case 2 : //as
2624             if ((data[++index] == 's')) {
2625               return TokenNameas;
2626             } else {
2627               return TokenNameIdentifier;
2628             }
2629           case 3 : //and
2630             if ((data[++index] == 'n') && (data[++index] == 'd')) {
2631               return TokenNameAND;
2632             } else {
2633               return TokenNameIdentifier;
2634             }
2635             //          case 5 :
2636             //            if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2637             //              return TokenNamearray;
2638             //            else
2639             //              return TokenNameIdentifier;
2640           default :
2641             return TokenNameIdentifier;
2642         }
2643       case 'b' : //break
2644         switch (length) {
2645           case 5 :
2646             if ((data[++index] == 'r')
2647               && (data[++index] == 'e')
2648               && (data[++index] == 'a')
2649               && (data[++index] == 'k'))
2650               return TokenNamebreak;
2651             else
2652               return TokenNameIdentifier;
2653           default :
2654             return TokenNameIdentifier;
2655         }
2656
2657       case 'c' : //case class continue
2658         switch (length) {
2659           case 4 :
2660             if ((data[++index] == 'a')
2661               && (data[++index] == 's')
2662               && (data[++index] == 'e'))
2663               return TokenNamecase;
2664             else
2665               return TokenNameIdentifier;
2666           case 5 :
2667             if ((data[++index] == 'l')
2668               && (data[++index] == 'a')
2669               && (data[++index] == 's')
2670               && (data[++index] == 's'))
2671               return TokenNameclass;
2672             else
2673               return TokenNameIdentifier;
2674           case 8 :
2675             if ((data[++index] == 'o')
2676               && (data[++index] == 'n')
2677               && (data[++index] == 't')
2678               && (data[++index] == 'i')
2679               && (data[++index] == 'n')
2680               && (data[++index] == 'u')
2681               && (data[++index] == 'e'))
2682               return TokenNamecontinue;
2683             else
2684               return TokenNameIdentifier;
2685           default :
2686             return TokenNameIdentifier;
2687         }
2688
2689       case 'd' : //define default do
2690         switch (length) {
2691           case 2 :
2692             if ((data[++index] == 'o'))
2693               return TokenNamedo;
2694             else
2695               return TokenNameIdentifier;
2696           case 6 :
2697             if ((data[++index] == 'e')
2698               && (data[++index] == 'f')
2699               && (data[++index] == 'i')
2700               && (data[++index] == 'n')
2701               && (data[++index] == 'e'))
2702               return TokenNamedefine;
2703             else
2704               return TokenNameIdentifier;
2705           case 7 :
2706             if ((data[++index] == 'e')
2707               && (data[++index] == 'f')
2708               && (data[++index] == 'a')
2709               && (data[++index] == 'u')
2710               && (data[++index] == 'l')
2711               && (data[++index] == 't'))
2712               return TokenNamedefault;
2713             else
2714               return TokenNameIdentifier;
2715           default :
2716             return TokenNameIdentifier;
2717         }
2718       case 'e' : //echo else elseif extends
2719         switch (length) {
2720           case 4 :
2721             if ((data[++index] == 'c')
2722               && (data[++index] == 'h')
2723               && (data[++index] == 'o'))
2724               return TokenNameecho;
2725             else if (
2726               (data[index] == 'l')
2727                 && (data[++index] == 's')
2728                 && (data[++index] == 'e'))
2729               return TokenNameelse;
2730             else
2731               return TokenNameIdentifier;
2732           case 5 : // endif
2733             if ((data[++index] == 'n')
2734               && (data[++index] == 'd')
2735               && (data[++index] == 'i')
2736               && (data[++index] == 'f'))
2737               return TokenNameendif;
2738             else
2739               return TokenNameIdentifier;
2740           case 6 : // endfor
2741             if ((data[++index] == 'n')
2742               && (data[++index] == 'd')
2743               && (data[++index] == 'f')
2744               && (data[++index] == 'o')
2745               && (data[++index] == 'r'))
2746               return TokenNameendfor;
2747             else if (
2748               (data[index] == 'l')
2749                 && (data[++index] == 's')
2750                 && (data[++index] == 'e')
2751                 && (data[++index] == 'i')
2752                 && (data[++index] == 'f'))
2753               return TokenNameelseif;
2754             else
2755               return TokenNameIdentifier;
2756           case 7 :
2757             if ((data[++index] == 'x')
2758               && (data[++index] == 't')
2759               && (data[++index] == 'e')
2760               && (data[++index] == 'n')
2761               && (data[++index] == 'd')
2762               && (data[++index] == 's'))
2763               return TokenNameextends;
2764             else
2765               return TokenNameIdentifier;
2766           case 8 : // endwhile
2767             if ((data[++index] == 'n')
2768               && (data[++index] == 'd')
2769               && (data[++index] == 'w')
2770               && (data[++index] == 'h')
2771               && (data[++index] == 'i')
2772               && (data[++index] == 'l')
2773               && (data[++index] == 'e'))
2774               return TokenNameendwhile;
2775             else
2776               return TokenNameIdentifier;
2777           case 9 : // endswitch
2778             if ((data[++index] == 'n')
2779               && (data[++index] == 'd')
2780               && (data[++index] == 's')
2781               && (data[++index] == 'w')
2782               && (data[++index] == 'i')
2783               && (data[++index] == 't')
2784               && (data[++index] == 'c')
2785               && (data[++index] == 'h'))
2786               return TokenNameendswitch;
2787             else
2788               return TokenNameIdentifier;
2789           case 10 : // endforeach
2790             if ((data[++index] == 'n')
2791               && (data[++index] == 'd')
2792               && (data[++index] == 'f')
2793               && (data[++index] == 'o')
2794               && (data[++index] == 'r')
2795               && (data[++index] == 'e')
2796               && (data[++index] == 'a')
2797               && (data[++index] == 'c')
2798               && (data[++index] == 'h'))
2799               return TokenNameendforeach;
2800             else
2801               return TokenNameIdentifier;
2802
2803           default :
2804             return TokenNameIdentifier;
2805         }
2806
2807       case 'f' : //for false function
2808         switch (length) {
2809           case 3 :
2810             if ((data[++index] == 'o') && (data[++index] == 'r'))
2811               return TokenNamefor;
2812             else
2813               return TokenNameIdentifier;
2814           case 5 :
2815             if ((data[++index] == 'a')
2816               && (data[++index] == 'l')
2817               && (data[++index] == 's')
2818               && (data[++index] == 'e'))
2819               return TokenNamefalse;
2820             else
2821               return TokenNameIdentifier;
2822           case 7 : // function
2823             if ((data[++index] == 'o')
2824               && (data[++index] == 'r')
2825               && (data[++index] == 'e')
2826               && (data[++index] == 'a')
2827               && (data[++index] == 'c')
2828               && (data[++index] == 'h'))
2829               return TokenNameforeach;
2830             else
2831               return TokenNameIdentifier;
2832           case 8 : // function
2833             if ((data[++index] == 'u')
2834               && (data[++index] == 'n')
2835               && (data[++index] == 'c')
2836               && (data[++index] == 't')
2837               && (data[++index] == 'i')
2838               && (data[++index] == 'o')
2839               && (data[++index] == 'n'))
2840               return TokenNamefunction;
2841             else
2842               return TokenNameIdentifier;
2843           default :
2844             return TokenNameIdentifier;
2845         }
2846       case 'g' : //global
2847         if (length == 6) {
2848           if ((data[++index] == 'l')
2849             && (data[++index] == 'o')
2850             && (data[++index] == 'b')
2851             && (data[++index] == 'a')
2852             && (data[++index] == 'l')) {
2853             return TokenNameglobal;
2854           }
2855         }
2856         return TokenNameIdentifier;
2857
2858       case 'i' : //if int
2859         switch (length) {
2860           case 2 :
2861             if (data[++index] == 'f')
2862               return TokenNameif;
2863             else
2864               return TokenNameIdentifier;
2865             //          case 3 :
2866             //            if ((data[++index] == 'n') && (data[++index] == 't'))
2867             //              return TokenNameint;
2868             //            else
2869             //              return TokenNameIdentifier;
2870           case 7 :
2871             if ((data[++index] == 'n')
2872               && (data[++index] == 'c')
2873               && (data[++index] == 'l')
2874               && (data[++index] == 'u')
2875               && (data[++index] == 'd')
2876               && (data[++index] == 'e'))
2877               return TokenNameinclude;
2878             else
2879               return TokenNameIdentifier;
2880           case 12 :
2881             if ((data[++index] == 'n')
2882               && (data[++index] == 'c')
2883               && (data[++index] == 'l')
2884               && (data[++index] == 'u')
2885               && (data[++index] == 'd')
2886               && (data[++index] == 'e')
2887               && (data[++index] == '_')
2888               && (data[++index] == 'o')
2889               && (data[++index] == 'n')
2890               && (data[++index] == 'c')
2891               && (data[++index] == 'e'))
2892               return TokenNameinclude_once;
2893             else
2894               return TokenNameIdentifier;
2895           default :
2896             return TokenNameIdentifier;
2897         }
2898
2899       case 'l' : //list
2900         if (length == 4) {
2901           if ((data[++index] == 'i')
2902             && (data[++index] == 's')
2903             && (data[++index] == 't')) {
2904             return TokenNamelist;
2905           }
2906         }
2907         return TokenNameIdentifier;
2908
2909       case 'n' : // new null
2910         switch (length) {
2911           case 3 :
2912             if ((data[++index] == 'e') && (data[++index] == 'w'))
2913               return TokenNamenew;
2914             else
2915               return TokenNameIdentifier;
2916           case 4 :
2917             if ((data[++index] == 'u')
2918               && (data[++index] == 'l')
2919               && (data[++index] == 'l'))
2920               return TokenNamenull;
2921             else
2922               return TokenNameIdentifier;
2923
2924           default :
2925             return TokenNameIdentifier;
2926         }
2927       case 'o' : // or old_function
2928         if (length == 2) {
2929           if (data[++index] == 'r') {
2930             return TokenNameOR;
2931           }
2932         }
2933         //        if (length == 12) {
2934         //          if ((data[++index] == 'l')
2935         //            && (data[++index] == 'd')
2936         //            && (data[++index] == '_')
2937         //            && (data[++index] == 'f')
2938         //            && (data[++index] == 'u')
2939         //            && (data[++index] == 'n')
2940         //            && (data[++index] == 'c')
2941         //            && (data[++index] == 't')
2942         //            && (data[++index] == 'i')
2943         //            && (data[++index] == 'o')
2944         //            && (data[++index] == 'n')) {
2945         //            return TokenNameold_function;
2946         //          }
2947         //        }
2948         return TokenNameIdentifier;
2949
2950       case 'p' : // print
2951         if (length == 5) {
2952           if ((data[++index] == 'r')
2953             && (data[++index] == 'i')
2954             && (data[++index] == 'n')
2955             && (data[++index] == 't')) {
2956             return TokenNameprint;
2957           }
2958         }
2959         return TokenNameIdentifier;
2960       case 'r' : //return require require_once
2961         if (length == 6) {
2962           if ((data[++index] == 'e')
2963             && (data[++index] == 't')
2964             && (data[++index] == 'u')
2965             && (data[++index] == 'r')
2966             && (data[++index] == 'n')) {
2967             return TokenNamereturn;
2968           }
2969         } else if (length == 7) {
2970           if ((data[++index] == 'e')
2971             && (data[++index] == 'q')
2972             && (data[++index] == 'u')
2973             && (data[++index] == 'i')
2974             && (data[++index] == 'r')
2975             && (data[++index] == 'e')) {
2976             return TokenNamerequire;
2977           }
2978         } else if (length == 12) {
2979           if ((data[++index] == 'e')
2980             && (data[++index] == 'q')
2981             && (data[++index] == 'u')
2982             && (data[++index] == 'i')
2983             && (data[++index] == 'r')
2984             && (data[++index] == 'e')
2985             && (data[++index] == '_')
2986             && (data[++index] == 'o')
2987             && (data[++index] == 'n')
2988             && (data[++index] == 'c')
2989             && (data[++index] == 'e')) {
2990             return TokenNamerequire_once;
2991           }
2992         } else
2993           return TokenNameIdentifier;
2994
2995       case 's' : //static switch
2996         switch (length) {
2997           case 6 :
2998             if (data[++index] == 't')
2999               if ((data[++index] == 'a')
3000                 && (data[++index] == 't')
3001                 && (data[++index] == 'i')
3002                 && (data[++index] == 'c')) {
3003                 return TokenNamestatic;
3004               } else
3005                 return TokenNameIdentifier;
3006             else if (
3007               (data[index] == 'w')
3008                 && (data[++index] == 'i')
3009                 && (data[++index] == 't')
3010                 && (data[++index] == 'c')
3011                 && (data[++index] == 'h'))
3012               return TokenNameswitch;
3013             else
3014               return TokenNameIdentifier;
3015           default :
3016             return TokenNameIdentifier;
3017         }
3018
3019       case 't' : // true
3020         switch (length) {
3021
3022           case 4 :
3023             if ((data[++index] == 'r')
3024               && (data[++index] == 'u')
3025               && (data[++index] == 'e'))
3026               return TokenNametrue;
3027             else
3028               return TokenNameIdentifier;
3029             //            if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
3030             //              return TokenNamethis;
3031
3032           default :
3033             return TokenNameIdentifier;
3034         }
3035
3036       case 'v' : //var
3037         switch (length) {
3038           case 3 :
3039             if ((data[++index] == 'a') && (data[++index] == 'r'))
3040               return TokenNamevar;
3041             else
3042               return TokenNameIdentifier;
3043
3044           default :
3045             return TokenNameIdentifier;
3046         }
3047
3048       case 'w' : //while
3049         switch (length) {
3050           case 5 :
3051             if ((data[++index] == 'h')
3052               && (data[++index] == 'i')
3053               && (data[++index] == 'l')
3054               && (data[++index] == 'e'))
3055               return TokenNamewhile;
3056             else
3057               return TokenNameIdentifier;
3058             //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
3059             //return TokenNamewidefp ;
3060             //else
3061             //return TokenNameIdentifier;
3062           default :
3063             return TokenNameIdentifier;
3064         }
3065
3066       case 'x' : //xor
3067         switch (length) {
3068           case 3 :
3069             if ((data[++index] == 'o') && (data[++index] == 'r'))
3070               return TokenNameXOR;
3071             else
3072               return TokenNameIdentifier;
3073
3074           default :
3075             return TokenNameIdentifier;
3076         }
3077       default :
3078         return TokenNameIdentifier;
3079     }
3080   }
3081   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3082
3083     //when entering this method the currentCharacter is the firt
3084     //digit of the number , i.e. it may be preceeded by a . when
3085     //dotPrefix is true
3086
3087     boolean floating = dotPrefix;
3088     if ((!dotPrefix) && (currentCharacter == '0')) {
3089       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3090         //force the first char of the hexa number do exist...
3091         // consume next character
3092         unicodeAsBackSlash = false;
3093         if (((currentCharacter = source[currentPosition++]) == '\\')
3094           && (source[currentPosition] == 'u')) {
3095           getNextUnicodeChar();
3096         } else {
3097           if (withoutUnicodePtr != 0) {
3098             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3099           }
3100         }
3101         if (Character.digit(currentCharacter, 16) == -1)
3102           throw new InvalidInputException(INVALID_HEXA);
3103         //---end forcing--
3104         while (getNextCharAsDigit(16)) {
3105         };
3106         //        if (getNextChar('l', 'L') >= 0)
3107         //          return TokenNameLongLiteral;
3108         //        else
3109         return TokenNameIntegerLiteral;
3110       }
3111
3112       //there is x or X in the number
3113       //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3114       if (getNextCharAsDigit()) {
3115         //-------------potential octal-----------------
3116         while (getNextCharAsDigit()) {
3117         };
3118
3119         //        if (getNextChar('l', 'L') >= 0) {
3120         //          return TokenNameLongLiteral;
3121         //        }
3122         //
3123         //        if (getNextChar('f', 'F') >= 0) {
3124         //          return TokenNameFloatingPointLiteral;
3125         //        }
3126
3127         if (getNextChar('d', 'D') >= 0) {
3128           return TokenNameDoubleLiteral;
3129         } else { //make the distinction between octal and float ....
3130           if (getNextChar('.')) { //bingo ! ....
3131             while (getNextCharAsDigit()) {
3132             };
3133             if (getNextChar('e', 'E') >= 0) {
3134               // consume next character
3135               unicodeAsBackSlash = false;
3136               if (((currentCharacter = source[currentPosition++]) == '\\')
3137                 && (source[currentPosition] == 'u')) {
3138                 getNextUnicodeChar();
3139               } else {
3140                 if (withoutUnicodePtr != 0) {
3141                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3142                 }
3143               }
3144
3145               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3146                 // consume next character
3147                 unicodeAsBackSlash = false;
3148                 if (((currentCharacter = source[currentPosition++]) == '\\')
3149                   && (source[currentPosition] == 'u')) {
3150                   getNextUnicodeChar();
3151                 } else {
3152                   if (withoutUnicodePtr != 0) {
3153                     withoutUnicodeBuffer[++withoutUnicodePtr] =
3154                       currentCharacter;
3155                   }
3156                 }
3157               }
3158               if (!Character.isDigit(currentCharacter))
3159                 throw new InvalidInputException(INVALID_FLOAT);
3160               while (getNextCharAsDigit()) {
3161               };
3162             }
3163             //            if (getNextChar('f', 'F') >= 0)
3164             //              return TokenNameFloatingPointLiteral;
3165             getNextChar('d', 'D'); //jump over potential d or D
3166             return TokenNameDoubleLiteral;
3167           } else {
3168             return TokenNameIntegerLiteral;
3169           }
3170         }
3171       } else {
3172         /* carry on */
3173       }
3174     }
3175
3176     while (getNextCharAsDigit()) {
3177     };
3178
3179     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3180     //      return TokenNameLongLiteral;
3181
3182     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3183       while (getNextCharAsDigit()) {
3184       };
3185       floating = true;
3186     }
3187
3188     //if floating is true both exponant and suffix may be optional
3189
3190     if (getNextChar('e', 'E') >= 0) {
3191       floating = true;
3192       // consume next character
3193       unicodeAsBackSlash = false;
3194       if (((currentCharacter = source[currentPosition++]) == '\\')
3195         && (source[currentPosition] == 'u')) {
3196         getNextUnicodeChar();
3197       } else {
3198         if (withoutUnicodePtr != 0) {
3199           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3200         }
3201       }
3202
3203       if ((currentCharacter == '-')
3204         || (currentCharacter == '+')) { // consume next character
3205         unicodeAsBackSlash = false;
3206         if (((currentCharacter = source[currentPosition++]) == '\\')
3207           && (source[currentPosition] == 'u')) {
3208           getNextUnicodeChar();
3209         } else {
3210           if (withoutUnicodePtr != 0) {
3211             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3212           }
3213         }
3214       }
3215       if (!Character.isDigit(currentCharacter))
3216         throw new InvalidInputException(INVALID_FLOAT);
3217       while (getNextCharAsDigit()) {
3218       };
3219     }
3220
3221     if (getNextChar('d', 'D') >= 0)
3222       return TokenNameDoubleLiteral;
3223     //    if (getNextChar('f', 'F') >= 0)
3224     //      return TokenNameFloatingPointLiteral;
3225
3226     //the long flag has been tested before
3227
3228     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3229   }
3230   /**
3231    * Search the line number corresponding to a specific position
3232    *
3233    */
3234   public final int getLineNumber(int position) {
3235
3236     if (lineEnds == null)
3237       return 1;
3238     int length = linePtr + 1;
3239     if (length == 0)
3240       return 1;
3241     int g = 0, d = length - 1;
3242     int m = 0;
3243     while (g <= d) {
3244       m = (g + d) / 2;
3245       if (position < lineEnds[m]) {
3246         d = m - 1;
3247       } else if (position > lineEnds[m]) {
3248         g = m + 1;
3249       } else {
3250         return m + 1;
3251       }
3252     }
3253     if (position < lineEnds[m]) {
3254       return m + 1;
3255     }
3256     return m + 2;
3257   }
3258
3259   public void setPHPMode(boolean mode) {
3260     phpMode = mode;
3261   }
3262
3263   public final void setSource(char[] source) {
3264     //the source-buffer is set to sourceString
3265
3266     if (source == null) {
3267       this.source = new char[0];
3268     } else {
3269       this.source = source;
3270     }
3271     startPosition = -1;
3272     initialPosition = currentPosition = 0;
3273     containsAssertKeyword = false;
3274     withoutUnicodeBuffer = new char[this.source.length];
3275
3276   }
3277
3278   public String toString() {
3279     if (startPosition == source.length)
3280       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3281     if (currentPosition > source.length)
3282       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3283
3284     char front[] = new char[startPosition];
3285     System.arraycopy(source, 0, front, 0, startPosition);
3286
3287     int middleLength = (currentPosition - 1) - startPosition + 1;
3288     char middle[];
3289     if (middleLength > -1) {
3290       middle = new char[middleLength];
3291       System.arraycopy(source, startPosition, middle, 0, middleLength);
3292     } else {
3293       middle = new char[0];
3294     }
3295
3296     char end[] = new char[source.length - (currentPosition - 1)];
3297     System.arraycopy(
3298       source,
3299       (currentPosition - 1) + 1,
3300       end,
3301       0,
3302       source.length - (currentPosition - 1) - 1);
3303
3304     return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3305     + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3306     + new String(end);
3307   }
3308   public final String toStringAction(int act) {
3309     switch (act) {
3310       case TokenNameERROR :
3311         return "ScannerError(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3312       case TokenNameStopPHP :
3313         return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3314       case TokenNameIdentifier :
3315         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3316       case TokenNameVariable :
3317         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3318       case TokenNameas :
3319         return "as"; //$NON-NLS-1$
3320       case TokenNamebreak :
3321         return "break"; //$NON-NLS-1$
3322       case TokenNamecase :
3323         return "case"; //$NON-NLS-1$
3324       case TokenNameclass :
3325         return "class"; //$NON-NLS-1$
3326       case TokenNamecontinue :
3327         return "continue"; //$NON-NLS-1$
3328       case TokenNamedefault :
3329         return "default"; //$NON-NLS-1$
3330       case TokenNamedefine :
3331         return "define"; //$NON-NLS-1$
3332       case TokenNamedo :
3333         return "do"; //$NON-NLS-1$
3334       case TokenNameecho :
3335         return "echo"; //$NON-NLS-1$
3336       case TokenNameelse :
3337         return "else"; //$NON-NLS-1$
3338       case TokenNameelseif :
3339         return "elseif"; //$NON-NLS-1$
3340       case TokenNameendfor :
3341         return "endfor"; //$NON-NLS-1$
3342       case TokenNameendforeach :
3343         return "endforeach"; //$NON-NLS-1$
3344       case TokenNameendif :
3345         return "endif"; //$NON-NLS-1$
3346       case TokenNameendswitch :
3347         return "endswitch"; //$NON-NLS-1$
3348       case TokenNameendwhile :
3349         return "endwhile"; //$NON-NLS-1$
3350       case TokenNameextends :
3351         return "extends"; //$NON-NLS-1$
3352       case TokenNamefalse :
3353         return "false"; //$NON-NLS-1$
3354       case TokenNamefor :
3355         return "for"; //$NON-NLS-1$
3356       case TokenNameforeach :
3357         return "foreach"; //$NON-NLS-1$
3358       case TokenNamefunction :
3359         return "function"; //$NON-NLS-1$
3360       case TokenNameglobal :
3361         return "global"; //$NON-NLS-1$
3362       case TokenNameif :
3363         return "if"; //$NON-NLS-1$
3364       case TokenNameinclude :
3365         return "include"; //$NON-NLS-1$
3366       case TokenNameinclude_once :
3367         return "include_once"; //$NON-NLS-1$
3368       case TokenNamelist :
3369         return "list"; //$NON-NLS-1$
3370       case TokenNamenew :
3371         return "new"; //$NON-NLS-1$
3372       case TokenNamenull :
3373         return "null"; //$NON-NLS-1$
3374       case TokenNameprint :
3375         return "print"; //$NON-NLS-1$
3376       case TokenNamerequire :
3377         return "require"; //$NON-NLS-1$
3378       case TokenNamerequire_once :
3379         return "require_once"; //$NON-NLS-1$
3380       case TokenNamereturn :
3381         return "return"; //$NON-NLS-1$
3382       case TokenNamestatic :
3383         return "static"; //$NON-NLS-1$
3384       case TokenNameswitch :
3385         return "switch"; //$NON-NLS-1$
3386       case TokenNametrue :
3387         return "true"; //$NON-NLS-1$
3388       case TokenNamevar :
3389         return "var"; //$NON-NLS-1$
3390       case TokenNamewhile :
3391         return "while"; //$NON-NLS-1$
3392       case TokenNameIntegerLiteral :
3393         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3394       case TokenNameDoubleLiteral :
3395         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3396       case TokenNameStringLiteral :
3397         return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3398       case TokenNameStringConstant :
3399         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3400       case TokenNameStringInterpolated :
3401         return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3402       case TokenNameHEREDOC :
3403         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3404
3405       case TokenNamePLUS_PLUS :
3406         return "++"; //$NON-NLS-1$
3407       case TokenNameMINUS_MINUS :
3408         return "--"; //$NON-NLS-1$
3409       case TokenNameEQUAL_EQUAL :
3410         return "=="; //$NON-NLS-1$
3411       case TokenNameEQUAL_GREATER :
3412         return "=>"; //$NON-NLS-1$
3413       case TokenNameLESS_EQUAL :
3414         return "<="; //$NON-NLS-1$
3415       case TokenNameGREATER_EQUAL :
3416         return ">="; //$NON-NLS-1$
3417       case TokenNameNOT_EQUAL :
3418         return "!="; //$NON-NLS-1$
3419       case TokenNameLEFT_SHIFT :
3420         return "<<"; //$NON-NLS-1$
3421       case TokenNameRIGHT_SHIFT :
3422         return ">>"; //$NON-NLS-1$
3423       case TokenNamePLUS_EQUAL :
3424         return "+="; //$NON-NLS-1$
3425       case TokenNameMINUS_EQUAL :
3426         return "-="; //$NON-NLS-1$
3427       case TokenNameMULTIPLY_EQUAL :
3428         return "*="; //$NON-NLS-1$
3429       case TokenNameDIVIDE_EQUAL :
3430         return "/="; //$NON-NLS-1$
3431       case TokenNameAND_EQUAL :
3432         return "&="; //$NON-NLS-1$
3433       case TokenNameOR_EQUAL :
3434         return "|="; //$NON-NLS-1$
3435       case TokenNameXOR_EQUAL :
3436         return "^="; //$NON-NLS-1$
3437       case TokenNameREMAINDER_EQUAL :
3438         return "%="; //$NON-NLS-1$
3439       case TokenNameLEFT_SHIFT_EQUAL :
3440         return "<<="; //$NON-NLS-1$
3441       case TokenNameRIGHT_SHIFT_EQUAL :
3442         return ">>="; //$NON-NLS-1$
3443       case TokenNameOR_OR :
3444         return "||"; //$NON-NLS-1$
3445       case TokenNameAND_AND :
3446         return "&&"; //$NON-NLS-1$
3447       case TokenNamePLUS :
3448         return "+"; //$NON-NLS-1$
3449       case TokenNameMINUS :
3450         return "-"; //$NON-NLS-1$
3451       case TokenNameMINUS_GREATER :
3452         return "->";
3453       case TokenNameNOT :
3454         return "!"; //$NON-NLS-1$
3455       case TokenNameREMAINDER :
3456         return "%"; //$NON-NLS-1$
3457       case TokenNameXOR :
3458         return "^"; //$NON-NLS-1$
3459       case TokenNameAND :
3460         return "&"; //$NON-NLS-1$
3461       case TokenNameMULTIPLY :
3462         return "*"; //$NON-NLS-1$
3463       case TokenNameOR :
3464         return "|"; //$NON-NLS-1$
3465       case TokenNameTWIDDLE :
3466         return "~"; //$NON-NLS-1$
3467       case TokenNameTWIDDLE_EQUAL :
3468         return "~="; //$NON-NLS-1$
3469       case TokenNameDIVIDE :
3470         return "/"; //$NON-NLS-1$
3471       case TokenNameGREATER :
3472         return ">"; //$NON-NLS-1$
3473       case TokenNameLESS :
3474         return "<"; //$NON-NLS-1$
3475       case TokenNameLPAREN :
3476         return "("; //$NON-NLS-1$
3477       case TokenNameRPAREN :
3478         return ")"; //$NON-NLS-1$
3479       case TokenNameLBRACE :
3480         return "{"; //$NON-NLS-1$
3481       case TokenNameRBRACE :
3482         return "}"; //$NON-NLS-1$
3483       case TokenNameLBRACKET :
3484         return "["; //$NON-NLS-1$
3485       case TokenNameRBRACKET :
3486         return "]"; //$NON-NLS-1$
3487       case TokenNameSEMICOLON :
3488         return ";"; //$NON-NLS-1$
3489       case TokenNameQUESTION :
3490         return "?"; //$NON-NLS-1$
3491       case TokenNameCOLON :
3492         return ":"; //$NON-NLS-1$
3493       case TokenNameCOMMA :
3494         return ","; //$NON-NLS-1$
3495       case TokenNameDOT :
3496         return "."; //$NON-NLS-1$
3497       case TokenNameEQUAL :
3498         return "="; //$NON-NLS-1$
3499       case TokenNameAT :
3500         return "@";
3501       case TokenNameDOLLAR_LBRACE :
3502         return "${";
3503       case TokenNameEOF :
3504         return "EOF"; //$NON-NLS-1$
3505       default :
3506         return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3507     }
3508   }
3509
3510   public Scanner(
3511     boolean tokenizeComments,
3512     boolean tokenizeWhiteSpace,
3513     boolean checkNonExternalizedStringLiterals) {
3514     this(
3515       tokenizeComments,
3516       tokenizeWhiteSpace,
3517       checkNonExternalizedStringLiterals,
3518       false);
3519   }
3520
3521   public Scanner(
3522     boolean tokenizeComments,
3523     boolean tokenizeWhiteSpace,
3524     boolean checkNonExternalizedStringLiterals,
3525     boolean assertMode) {
3526     this.eofPosition = Integer.MAX_VALUE;
3527     this.tokenizeComments = tokenizeComments;
3528     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3529     this.checkNonExternalizedStringLiterals =
3530       checkNonExternalizedStringLiterals;
3531     this.assertMode = assertMode;
3532   }
3533
3534   private void checkNonExternalizeString() throws InvalidInputException {
3535     if (currentLine == null)
3536       return;
3537     parseTags(currentLine);
3538   }
3539
3540   private void parseTags(NLSLine line) throws InvalidInputException {
3541     String s = new String(getCurrentTokenSource());
3542     int pos = s.indexOf(TAG_PREFIX);
3543     int lineLength = line.size();
3544     while (pos != -1) {
3545       int start = pos + TAG_PREFIX_LENGTH;
3546       int end = s.indexOf(TAG_POSTFIX, start);
3547       String index = s.substring(start, end);
3548       int i = 0;
3549       try {
3550         i = Integer.parseInt(index) - 1;
3551         // Tags are one based not zero based.
3552       } catch (NumberFormatException e) {
3553         i = -1; // we don't want to consider this as a valid NLS tag
3554       }
3555       if (line.exists(i)) {
3556         line.set(i, null);
3557       }
3558       pos = s.indexOf(TAG_PREFIX, start);
3559     }
3560
3561     this.nonNLSStrings = new StringLiteral[lineLength];
3562     int nonNLSCounter = 0;
3563     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3564       StringLiteral literal = (StringLiteral) iterator.next();
3565       if (literal != null) {
3566         this.nonNLSStrings[nonNLSCounter++] = literal;
3567       }
3568     }
3569     if (nonNLSCounter == 0) {
3570       this.nonNLSStrings = null;
3571       currentLine = null;
3572       return;
3573     }
3574     this.wasNonExternalizedStringLiteral = true;
3575     if (nonNLSCounter != lineLength) {
3576       System.arraycopy(
3577         this.nonNLSStrings,
3578         0,
3579         (this.nonNLSStrings = new StringLiteral[nonNLSCounter]),
3580         0,
3581         nonNLSCounter);
3582     }
3583     currentLine = null;
3584   }
3585 }