net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java

   1 /*******************************************************************************
   2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
   3  * All rights reserved. This program and the accompanying materials
   4  * are made available under the terms of the Common Public License v0.5
   5  * which accompanies this distribution, and is available at
   6  * http://www.eclipse.org/legal/cpl-v05.html
   7  *
   8  * Contributors:
   9  *     IBM Corporation - initial API and implementation
  10  ******************************************************************************/
  11 package net.sourceforge.phpdt.internal.compiler.parser;
  12
  13 import java.util.ArrayList;
  14 import java.util.Iterator;
  15 import java.util.List;
  16
  17 import net.sourceforge.phpdt.core.compiler.*;
  18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
  19
  20 public class Scanner implements IScanner, ITerminalSymbols {
  21
  22   /* APIs ares
  23    - getNextToken() which return the current type of the token
  24      (this value is not memorized by the scanner)
  25    - getCurrentTokenSource() which provides with the token "REAL" source
  26      (aka all unicode have been transformed into a correct char)
  27    - sourceStart gives the position into the stream
  28    - currentPosition-1 gives the sourceEnd position into the stream
  29   */
  30
  31   // 1.4 feature
  32   private boolean assertMode;
  33   public boolean useAssertAsAnIndentifier = false;
  34   //flag indicating if processed source contains occurrences of keyword assert
  35   public boolean containsAssertKeyword = false;
  36
  37   public boolean recordLineSeparator;
  38   public boolean phpMode = false;
  39
  40   public char currentCharacter;
  41   public int startPosition;
  42   public int currentPosition;
  43   public int initialPosition, eofPosition;
  44   // after this position eof are generated instead of real token from the source
  45
  46   public boolean tokenizeComments;
  47   public boolean tokenizeWhiteSpace;
  48
  49   //source should be viewed as a window (aka a part)
  50   //of a entire very large stream
  51   public char source[];
  52
  53   //unicode support
  54   public char[] withoutUnicodeBuffer;
  55   public int withoutUnicodePtr;
  56   //when == 0 ==> no unicode in the current token
  57   public boolean unicodeAsBackSlash = false;
  58
  59   public boolean scanningFloatLiteral = false;
  60
  61   //support for /** comments
  62   //public char[][] comments = new char[10][];
  63   public int[] commentStops = new int[10];
  64   public int[] commentStarts = new int[10];
  65   public int commentPtr = -1; // no comment test with commentPtr value -1
  66
  67   //diet parsing support - jump over some method body when requested
  68   public boolean diet = false;
  69
  70   //support for the  poor-line-debuggers ....
  71   //remember the position of the cr/lf
  72   public int[] lineEnds = new int[250];
  73   public int linePtr = -1;
  74   public boolean wasAcr = false;
  75
  76   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
  77
  78   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
  79   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
  80   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
  81   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
  82   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
  83   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
  84   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
  85
  86   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
  87   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
  88   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
  89   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
  90
  91   //----------------optimized identifier managment------------------
  92   static final char[] charArray_a = new char[] { 'a' },
  93     charArray_b = new char[] { 'b' },
  94     charArray_c = new char[] { 'c' },
  95     charArray_d = new char[] { 'd' },
  96     charArray_e = new char[] { 'e' },
  97     charArray_f = new char[] { 'f' },
  98     charArray_g = new char[] { 'g' },
  99     charArray_h = new char[] { 'h' },
 100     charArray_i = new char[] { 'i' },
 101     charArray_j = new char[] { 'j' },
 102     charArray_k = new char[] { 'k' },
 103     charArray_l = new char[] { 'l' },
 104     charArray_m = new char[] { 'm' },
 105     charArray_n = new char[] { 'n' },
 106     charArray_o = new char[] { 'o' },
 107     charArray_p = new char[] { 'p' },
 108     charArray_q = new char[] { 'q' },
 109     charArray_r = new char[] { 'r' },
 110     charArray_s = new char[] { 's' },
 111     charArray_t = new char[] { 't' },
 112     charArray_u = new char[] { 'u' },
 113     charArray_v = new char[] { 'v' },
 114     charArray_w = new char[] { 'w' },
 115     charArray_x = new char[] { 'x' },
 116     charArray_y = new char[] { 'y' },
 117     charArray_z = new char[] { 'z' };
 118
 119   static final char[] initCharArray =
 120     new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
 121   static final int TableSize = 30, InternalTableSize = 6;
 122   //30*6 = 180 entries
 123   public static final int OptimizedLength = 6;
 124   public /*static*/
 125   final char[][][][] charArray_length =
 126     new char[OptimizedLength][TableSize][InternalTableSize][];
 127   // support for detecting non-externalized string literals
 128   int currentLineNr = -1;
 129   int previousLineNr = -1;
 130   NLSLine currentLine = null;
 131   List lines = new ArrayList();
 132   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
 133   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
 134   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
 135   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
 136   public StringLiteral[] nonNLSStrings = null;
 137   public boolean checkNonExternalizedStringLiterals = true;
 138   public boolean wasNonExternalizedStringLiteral = false;
 139
 140   /*static*/ {
 141     for (int i = 0; i < 6; i++) {
 142       for (int j = 0; j < TableSize; j++) {
 143         for (int k = 0; k < InternalTableSize; k++) {
 144           charArray_length[i][j][k] = initCharArray;
 145         }
 146       }
 147     }
 148   }
 149   static int newEntry2 = 0,
 150     newEntry3 = 0,
 151     newEntry4 = 0,
 152     newEntry5 = 0,
 153     newEntry6 = 0;
 154
 155   public static final int RoundBracket = 0;
 156   public static final int SquareBracket = 1;
 157   public static final int CurlyBracket = 2;
 158   public static final int BracketKinds = 3;
 159
 160   public static final boolean DEBUG = false;
 161
 162   public Scanner() {
 163     this(false, false);
 164   }
 165   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
 166     this(tokenizeComments, tokenizeWhiteSpace, false);
 167   }
 168
 169   /**
 170    * Determines if the specified character is
 171    * permissible as the first character in a PHP identifier
 172    */
 173   public static boolean isPHPIdentifierStart(char ch) {
 174     return Character.isLetter(ch)
 175       || (ch == '_')
 176       || (0x7F <= ch && ch <= 0xFF);
 177   }
 178
 179   /**
 180    * Determines if the specified character may be part of a PHP identifier as
 181    * other than the first character
 182    */
 183   public static boolean isPHPIdentifierPart(char ch) {
 184     return Character.isLetterOrDigit(ch)
 185       || (ch == '_')
 186       || (0x7F <= ch && ch <= 0xFF);
 187   }
 188
 189   public final boolean atEnd() {
 190     // This code is not relevant if source is
 191     // Only a part of the real stream input
 192
 193     return source.length == currentPosition;
 194   }
 195   public char[] getCurrentIdentifierSource() {
 196     //return the token REAL source (aka unicodes are precomputed)
 197
 198     char[] result;
 199 //    if (withoutUnicodePtr != 0)
 200 //      //0 is used as a fast test flag so the real first char is in position 1
 201 //      System.arraycopy(
 202 //        withoutUnicodeBuffer,
 203 //        1,
 204 //        result = new char[withoutUnicodePtr],
 205 //        0,
 206 //        withoutUnicodePtr);
 207 //    else {
 208       int length = currentPosition - startPosition;
 209       switch (length) { // see OptimizedLength
 210         case 1 :
 211           return optimizedCurrentTokenSource1();
 212         case 2 :
 213           return optimizedCurrentTokenSource2();
 214         case 3 :
 215           return optimizedCurrentTokenSource3();
 216         case 4 :
 217           return optimizedCurrentTokenSource4();
 218         case 5 :
 219           return optimizedCurrentTokenSource5();
 220         case 6 :
 221           return optimizedCurrentTokenSource6();
 222       }
 223       //no optimization
 224       System.arraycopy(
 225         source,
 226         startPosition,
 227         result = new char[length],
 228         0,
 229         length);
 230  //   }
 231     return result;
 232   }
 233   public int getCurrentTokenEndPosition() {
 234     return this.currentPosition - 1;
 235   }
 236   public final char[] getCurrentTokenSource() {
 237     // Return the token REAL source (aka unicodes are precomputed)
 238
 239     char[] result;
 240 //    if (withoutUnicodePtr != 0)
 241 //      // 0 is used as a fast test flag so the real first char is in position 1
 242 //      System.arraycopy(
 243 //        withoutUnicodeBuffer,
 244 //        1,
 245 //        result = new char[withoutUnicodePtr],
 246 //        0,
 247 //        withoutUnicodePtr);
 248 //    else {
 249       int length;
 250       System.arraycopy(
 251         source,
 252         startPosition,
 253         result = new char[length = currentPosition - startPosition],
 254         0,
 255         length);
 256 //    }
 257     return result;
 258   }
 259
 260   public final char[] getCurrentTokenSource(int startPos) {
 261     // Return the token REAL source (aka unicodes are precomputed)
 262
 263     char[] result;
 264 //    if (withoutUnicodePtr != 0)
 265 //      // 0 is used as a fast test flag so the real first char is in position 1
 266 //      System.arraycopy(
 267 //        withoutUnicodeBuffer,
 268 //        1,
 269 //        result = new char[withoutUnicodePtr],
 270 //        0,
 271 //        withoutUnicodePtr);
 272 //    else {
 273       int length;
 274       System.arraycopy(
 275         source,
 276         startPos,
 277         result = new char[length = currentPosition - startPos],
 278         0,
 279         length);
 280   //  }
 281     return result;
 282   }
 283
 284   public final char[] getCurrentTokenSourceString() {
 285     //return the token REAL source (aka unicodes are precomputed).
 286     //REMOVE the two " that are at the beginning and the end.
 287
 288     char[] result;
 289     if (withoutUnicodePtr != 0)
 290       //0 is used as a fast test flag so the real first char is in position 1
 291       System.arraycopy(withoutUnicodeBuffer, 2,
 292       //2 is 1 (real start) + 1 (to jump over the ")
 293       result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
 294     else {
 295       int length;
 296       System.arraycopy(
 297         source,
 298         startPosition + 1,
 299         result = new char[length = currentPosition - startPosition - 2],
 300         0,
 301         length);
 302     }
 303     return result;
 304   }
 305   public int getCurrentTokenStartPosition() {
 306     return this.startPosition;
 307   }
 308   /*
 309    * Search the source position corresponding to the end of a given line number
 310    *
 311    * Line numbers are 1-based, and relative to the scanner initialPosition.
 312    * Character positions are 0-based.
 313    *
 314    * In case the given line number is inconsistent, answers -1.
 315    */
 316   public final int getLineEnd(int lineNumber) {
 317
 318     if (lineEnds == null)
 319       return -1;
 320     if (lineNumber >= lineEnds.length)
 321       return -1;
 322     if (lineNumber <= 0)
 323       return -1;
 324
 325     if (lineNumber == lineEnds.length - 1)
 326       return eofPosition;
 327     return lineEnds[lineNumber - 1];
 328     // next line start one character behind the lineEnd of the previous line
 329   }
 330   /**
 331    * Search the source position corresponding to the beginning of a given line number
 332    *
 333    * Line numbers are 1-based, and relative to the scanner initialPosition.
 334    * Character positions are 0-based.
 335    *
 336    * e.g.       getLineStart(1) --> 0   i.e. first line starts at character 0.
 337    *
 338    * In case the given line number is inconsistent, answers -1.
 339    */
 340   public final int getLineStart(int lineNumber) {
 341
 342     if (lineEnds == null)
 343       return -1;
 344     if (lineNumber >= lineEnds.length)
 345       return -1;
 346     if (lineNumber <= 0)
 347       return -1;
 348
 349     if (lineNumber == 1)
 350       return initialPosition;
 351     return lineEnds[lineNumber - 2] + 1;
 352     // next line start one character behind the lineEnd of the previous line
 353   }
 354   public final boolean getNextChar(char testedChar) {
 355     //BOOLEAN
 356     //handle the case of unicode.
 357     //when a unicode appears then we must use a buffer that holds char internal values
 358     //At the end of this method currentCharacter holds the new visited char
 359     //and currentPosition points right next after it
 360     //Both previous lines are true if the currentCharacter is == to the testedChar
 361     //On false, no side effect has occured.
 362
 363     //ALL getNextChar.... ARE OPTIMIZED COPIES
 364
 365     int temp = currentPosition;
 366     try {
 367       currentCharacter = source[currentPosition++];
 368 //      if (((currentCharacter = source[currentPosition++]) == '\\')
 369 //        && (source[currentPosition] == 'u')) {
 370 //        //-------------unicode traitement ------------
 371 //        int c1, c2, c3, c4;
 372 //        int unicodeSize = 6;
 373 //        currentPosition++;
 374 //        while (source[currentPosition] == 'u') {
 375 //          currentPosition++;
 376 //          unicodeSize++;
 377 //        }
 378 //
 379 //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 380 //          || c1 < 0)
 381 //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 382 //            || c2 < 0)
 383 //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 384 //            || c3 < 0)
 385 //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 386 //            || c4 < 0)) {
 387 //          currentPosition = temp;
 388 //          return false;
 389 //        }
 390 //
 391 //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 392 //        if (currentCharacter != testedChar) {
 393 //          currentPosition = temp;
 394 //          return false;
 395 //        }
 396 //        unicodeAsBackSlash = currentCharacter == '\\';
 397 //
 398 //        //need the unicode buffer
 399 //        if (withoutUnicodePtr == 0) {
 400 //          //buffer all the entries that have been left aside....
 401 //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 402 //          System.arraycopy(
 403 //            source,
 404 //            startPosition,
 405 //            withoutUnicodeBuffer,
 406 //            1,
 407 //            withoutUnicodePtr);
 408 //        }
 409 //        //fill the buffer with the char
 410 //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 411 //        return true;
 412 //
 413 //      } //-------------end unicode traitement--------------
 414 //      else {
 415         if (currentCharacter != testedChar) {
 416           currentPosition = temp;
 417           return false;
 418         }
 419         unicodeAsBackSlash = false;
 420 //        if (withoutUnicodePtr != 0)
 421 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 422         return true;
 423 //      }
 424     } catch (IndexOutOfBoundsException e) {
 425       unicodeAsBackSlash = false;
 426       currentPosition = temp;
 427       return false;
 428     }
 429   }
 430   public final int getNextChar(char testedChar1, char testedChar2) {
 431     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
 432     //test can be done with (x==0) for the first and (x>0) for the second
 433     //handle the case of unicode.
 434     //when a unicode appears then we must use a buffer that holds char internal values
 435     //At the end of this method currentCharacter holds the new visited char
 436     //and currentPosition points right next after it
 437     //Both previous lines are true if the currentCharacter is == to the testedChar1/2
 438     //On false, no side effect has occured.
 439
 440     //ALL getNextChar.... ARE OPTIMIZED COPIES
 441
 442     int temp = currentPosition;
 443     try {
 444       int result;
 445       currentCharacter = source[currentPosition++];
 446 //      if (((currentCharacter = source[currentPosition++]) == '\\')
 447 //        && (source[currentPosition] == 'u')) {
 448 //        //-------------unicode traitement ------------
 449 //        int c1, c2, c3, c4;
 450 //        int unicodeSize = 6;
 451 //        currentPosition++;
 452 //        while (source[currentPosition] == 'u') {
 453 //          currentPosition++;
 454 //          unicodeSize++;
 455 //        }
 456 //
 457 //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 458 //          || c1 < 0)
 459 //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 460 //            || c2 < 0)
 461 //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 462 //            || c3 < 0)
 463 //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 464 //            || c4 < 0)) {
 465 //          currentPosition = temp;
 466 //          return 2;
 467 //        }
 468 //
 469 //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 470 //        if (currentCharacter == testedChar1)
 471 //          result = 0;
 472 //        else if (currentCharacter == testedChar2)
 473 //          result = 1;
 474 //        else {
 475 //          currentPosition = temp;
 476 //          return -1;
 477 //        }
 478 //
 479 //        //need the unicode buffer
 480 //        if (withoutUnicodePtr == 0) {
 481 //          //buffer all the entries that have been left aside....
 482 //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 483 //          System.arraycopy(
 484 //            source,
 485 //            startPosition,
 486 //            withoutUnicodeBuffer,
 487 //            1,
 488 //            withoutUnicodePtr);
 489 //        }
 490 //        //fill the buffer with the char
 491 //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 492 //        return result;
 493 //      } //-------------end unicode traitement--------------
 494 //      else {
 495         if (currentCharacter == testedChar1)
 496           result = 0;
 497         else if (currentCharacter == testedChar2)
 498           result = 1;
 499         else {
 500           currentPosition = temp;
 501           return -1;
 502         }
 503
 504 //        if (withoutUnicodePtr != 0)
 505 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 506         return result;
 507  //     }
 508     } catch (IndexOutOfBoundsException e) {
 509       currentPosition = temp;
 510       return -1;
 511     }
 512   }
 513   public final boolean getNextCharAsDigit() {
 514     //BOOLEAN
 515     //handle the case of unicode.
 516     //when a unicode appears then we must use a buffer that holds char internal values
 517     //At the end of this method currentCharacter holds the new visited char
 518     //and currentPosition points right next after it
 519     //Both previous lines are true if the currentCharacter is a digit
 520     //On false, no side effect has occured.
 521
 522     //ALL getNextChar.... ARE OPTIMIZED COPIES
 523
 524     int temp = currentPosition;
 525     try {
 526       currentCharacter = source[currentPosition++];
 527 //      if (((currentCharacter = source[currentPosition++]) == '\\')
 528 //        && (source[currentPosition] == 'u')) {
 529 //        //-------------unicode traitement ------------
 530 //        int c1, c2, c3, c4;
 531 //        int unicodeSize = 6;
 532 //        currentPosition++;
 533 //        while (source[currentPosition] == 'u') {
 534 //          currentPosition++;
 535 //          unicodeSize++;
 536 //        }
 537 //
 538 //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 539 //          || c1 < 0)
 540 //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 541 //            || c2 < 0)
 542 //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 543 //            || c3 < 0)
 544 //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 545 //            || c4 < 0)) {
 546 //          currentPosition = temp;
 547 //          return false;
 548 //        }
 549 //
 550 //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 551 //        if (!Character.isDigit(currentCharacter)) {
 552 //          currentPosition = temp;
 553 //          return false;
 554 //        }
 555 //
 556 //        //need the unicode buffer
 557 //        if (withoutUnicodePtr == 0) {
 558 //          //buffer all the entries that have been left aside....
 559 //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 560 //          System.arraycopy(
 561 //            source,
 562 //            startPosition,
 563 //            withoutUnicodeBuffer,
 564 //            1,
 565 //            withoutUnicodePtr);
 566 //        }
 567 //        //fill the buffer with the char
 568 //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 569 //        return true;
 570 //      } //-------------end unicode traitement--------------
 571 //      else {
 572         if (!Character.isDigit(currentCharacter)) {
 573           currentPosition = temp;
 574           return false;
 575         }
 576 //        if (withoutUnicodePtr != 0)
 577 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 578         return true;
 579 //      }
 580     } catch (IndexOutOfBoundsException e) {
 581       currentPosition = temp;
 582       return false;
 583     }
 584   }
 585   public final boolean getNextCharAsDigit(int radix) {
 586     //BOOLEAN
 587     //handle the case of unicode.
 588     //when a unicode appears then we must use a buffer that holds char internal values
 589     //At the end of this method currentCharacter holds the new visited char
 590     //and currentPosition points right next after it
 591     //Both previous lines are true if the currentCharacter is a digit base on radix
 592     //On false, no side effect has occured.
 593
 594     //ALL getNextChar.... ARE OPTIMIZED COPIES
 595
 596     int temp = currentPosition;
 597     try {
 598       currentCharacter = source[currentPosition++];
 599 //      if (((currentCharacter = source[currentPosition++]) == '\\')
 600 //        && (source[currentPosition] == 'u')) {
 601 //        //-------------unicode traitement ------------
 602 //        int c1, c2, c3, c4;
 603 //        int unicodeSize = 6;
 604 //        currentPosition++;
 605 //        while (source[currentPosition] == 'u') {
 606 //          currentPosition++;
 607 //          unicodeSize++;
 608 //        }
 609 //
 610 //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 611 //          || c1 < 0)
 612 //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 613 //            || c2 < 0)
 614 //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 615 //            || c3 < 0)
 616 //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 617 //            || c4 < 0)) {
 618 //          currentPosition = temp;
 619 //          return false;
 620 //        }
 621 //
 622 //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 623 //        if (Character.digit(currentCharacter, radix) == -1) {
 624 //          currentPosition = temp;
 625 //          return false;
 626 //        }
 627 //
 628 //        //need the unicode buffer
 629 //        if (withoutUnicodePtr == 0) {
 630 //          //buffer all the entries that have been left aside....
 631 //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 632 //          System.arraycopy(
 633 //            source,
 634 //            startPosition,
 635 //            withoutUnicodeBuffer,
 636 //            1,
 637 //            withoutUnicodePtr);
 638 //        }
 639 //        //fill the buffer with the char
 640 //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 641 //        return true;
 642 //      } //-------------end unicode traitement--------------
 643 //      else {
 644         if (Character.digit(currentCharacter, radix) == -1) {
 645           currentPosition = temp;
 646           return false;
 647         }
 648 //        if (withoutUnicodePtr != 0)
 649 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 650         return true;
 651 //      }
 652     } catch (IndexOutOfBoundsException e) {
 653       currentPosition = temp;
 654       return false;
 655     }
 656   }
 657   public boolean getNextCharAsJavaIdentifierPart() {
 658     //BOOLEAN
 659     //handle the case of unicode.
 660     //when a unicode appears then we must use a buffer that holds char internal values
 661     //At the end of this method currentCharacter holds the new visited char
 662     //and currentPosition points right next after it
 663     //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
 664     //On false, no side effect has occured.
 665
 666     //ALL getNextChar.... ARE OPTIMIZED COPIES
 667
 668     int temp = currentPosition;
 669     try {
 670       currentCharacter = source[currentPosition++];
 671 //      if (((currentCharacter = source[currentPosition++]) == '\\')
 672 //        && (source[currentPosition] == 'u')) {
 673 //        //-------------unicode traitement ------------
 674 //        int c1, c2, c3, c4;
 675 //        int unicodeSize = 6;
 676 //        currentPosition++;
 677 //        while (source[currentPosition] == 'u') {
 678 //          currentPosition++;
 679 //          unicodeSize++;
 680 //        }
 681 //
 682 //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 683 //          || c1 < 0)
 684 //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 685 //            || c2 < 0)
 686 //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 687 //            || c3 < 0)
 688 //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 689 //            || c4 < 0)) {
 690 //          currentPosition = temp;
 691 //          return false;
 692 //        }
 693 //
 694 //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 695 //        if (!isPHPIdentifierPart(currentCharacter)) {
 696 //          currentPosition = temp;
 697 //          return false;
 698 //        }
 699 //
 700 //        //need the unicode buffer
 701 //        if (withoutUnicodePtr == 0) {
 702 //          //buffer all the entries that have been left aside....
 703 //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 704 //          System.arraycopy(
 705 //            source,
 706 //            startPosition,
 707 //            withoutUnicodeBuffer,
 708 //            1,
 709 //            withoutUnicodePtr);
 710 //        }
 711 //        //fill the buffer with the char
 712 //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 713 //        return true;
 714 //      } //-------------end unicode traitement--------------
 715 //      else {
 716         if (!isPHPIdentifierPart(currentCharacter)) {
 717           currentPosition = temp;
 718           return false;
 719         }
 720
 721 //        if (withoutUnicodePtr != 0)
 722 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 723         return true;
 724 //      }
 725     } catch (IndexOutOfBoundsException e) {
 726       currentPosition = temp;
 727       return false;
 728     }
 729   }
 730
 731   public int getNextToken() throws InvalidInputException {
 732     int htmlPosition = currentPosition;
 733     try {
 734       while (!phpMode) {
 735         currentCharacter = source[currentPosition++];
 736         if (currentCharacter == '<') {
 737           if (getNextChar('?')) {
 738             currentCharacter = source[currentPosition++];
 739             if ((currentCharacter == ' ')
 740               || Character.isWhitespace(currentCharacter)) {
 741               // <?
 742               startPosition = currentPosition;
 743               phpMode = true;
 744               if (tokenizeWhiteSpace) {
 745                 // && (whiteStart != currentPosition - 1)) {
 746                 // reposition scanner in case we are interested by spaces as tokens
 747                 startPosition = htmlPosition;
 748                 return TokenNameHTML;
 749               }
 750             } else {
 751               boolean phpStart =
 752                 (currentCharacter == 'P') || (currentCharacter == 'p');
 753               if (phpStart) {
 754                 int test = getNextChar('H', 'h');
 755                 if (test >= 0) {
 756                   test = getNextChar('P', 'p');
 757                   if (test >= 0) {
 758                     // <?PHP  <?php
 759                     startPosition = currentPosition;
 760                     phpMode = true;
 761
 762                     if (tokenizeWhiteSpace) {
 763                       // && (whiteStart != currentPosition - 1)) {
 764                       // reposition scanner in case we are interested by spaces as tokens
 765                       startPosition = htmlPosition;
 766                       return TokenNameHTML;
 767                     }
 768                   }
 769                 }
 770               }
 771             }
 772           }
 773         }
 774
 775         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 776           if (recordLineSeparator) {
 777             pushLineSeparator();
 778           } else {
 779             currentLine = null;
 780           }
 781         }
 782       }
 783     } //-----------------end switch while try--------------------
 784     catch (IndexOutOfBoundsException e) {
 785       if (tokenizeWhiteSpace) {
 786         // && (whiteStart != currentPosition - 1)) {
 787         // reposition scanner in case we are interested by spaces as tokens
 788         startPosition = htmlPosition;
 789       }
 790       return TokenNameEOF;
 791     }
 792
 793     if (phpMode) {
 794       this.wasAcr = false;
 795       if (diet) {
 796         jumpOverMethodBody();
 797         diet = false;
 798         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
 799       }
 800       try {
 801         while (true) { //loop for jumping over comments
 802           withoutUnicodePtr = 0;
 803           //start with a new token (even comment written with unicode )
 804
 805           // ---------Consume white space and handles startPosition---------
 806           int whiteStart = currentPosition;
 807           boolean isWhiteSpace;
 808           do {
 809             startPosition = currentPosition;
 810             currentCharacter = source[currentPosition++];
 811 //            if (((currentCharacter = source[currentPosition++]) == '\\')
 812 //              && (source[currentPosition] == 'u')) {
 813 //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
 814 //            } else {
 815               if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 816                 checkNonExternalizeString();
 817                 if (recordLineSeparator) {
 818                   pushLineSeparator();
 819                 } else {
 820                   currentLine = null;
 821                 }
 822               }
 823               isWhiteSpace =
 824                 (currentCharacter == ' ')
 825                   || Character.isWhitespace(currentCharacter);
 826 //            }
 827           } while (isWhiteSpace);
 828           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
 829             // reposition scanner in case we are interested by spaces as tokens
 830             currentPosition--;
 831             startPosition = whiteStart;
 832             return TokenNameWHITESPACE;
 833           }
 834           //little trick to get out in the middle of a source compuation
 835           if (currentPosition > eofPosition)
 836             return TokenNameEOF;
 837
 838           // ---------Identify the next token-------------
 839
 840           switch (currentCharacter) {
 841             case '(' :
 842               return TokenNameLPAREN;
 843             case ')' :
 844               return TokenNameRPAREN;
 845             case '{' :
 846               return TokenNameLBRACE;
 847             case '}' :
 848               return TokenNameRBRACE;
 849             case '[' :
 850               return TokenNameLBRACKET;
 851             case ']' :
 852               return TokenNameRBRACKET;
 853             case ';' :
 854               return TokenNameSEMICOLON;
 855             case ',' :
 856               return TokenNameCOMMA;
 857
 858             case '.' :
 859               if (getNextCharAsDigit())
 860                 return scanNumber(true);
 861               return TokenNameDOT;
 862             case '+' :
 863               {
 864                 int test;
 865                 if ((test = getNextChar('+', '=')) == 0)
 866                   return TokenNamePLUS_PLUS;
 867                 if (test > 0)
 868                   return TokenNamePLUS_EQUAL;
 869                 return TokenNamePLUS;
 870               }
 871             case '-' :
 872               {
 873                 int test;
 874                 if ((test = getNextChar('-', '=')) == 0)
 875                   return TokenNameMINUS_MINUS;
 876                 if (test > 0)
 877                   return TokenNameMINUS_EQUAL;
 878                 if (getNextChar('>'))
 879                   return TokenNameMINUS_GREATER;
 880
 881                 return TokenNameMINUS;
 882               }
 883             case '~' :
 884               if (getNextChar('='))
 885                 return TokenNameTWIDDLE_EQUAL;
 886               return TokenNameTWIDDLE;
 887             case '!' :
 888               if (getNextChar('='))
 889                 return TokenNameNOT_EQUAL;
 890               return TokenNameNOT;
 891             case '*' :
 892               if (getNextChar('='))
 893                 return TokenNameMULTIPLY_EQUAL;
 894               return TokenNameMULTIPLY;
 895             case '%' :
 896               if (getNextChar('='))
 897                 return TokenNameREMAINDER_EQUAL;
 898               return TokenNameREMAINDER;
 899             case '<' :
 900               {
 901                 int test;
 902                 if ((test = getNextChar('=', '<')) == 0)
 903                   return TokenNameLESS_EQUAL;
 904                 if (test > 0) {
 905                   if (getNextChar('='))
 906                     return TokenNameLEFT_SHIFT_EQUAL;
 907                   if (getNextChar('<')) {
 908                     int heredocStart = currentPosition;
 909                     int heredocLength = 0;
 910                     currentCharacter = source[currentPosition++];
 911                     if (isPHPIdentifierStart(currentCharacter)) {
 912                       currentCharacter = source[currentPosition++];
 913                     } else {
 914                       return TokenNameERROR;
 915                     }
 916                     while (isPHPIdentifierPart(currentCharacter)) {
 917                       currentCharacter = source[currentPosition++];
 918                     }
 919
 920                     heredocLength = currentPosition - heredocStart - 1;
 921
 922                     // heredoc end-tag determination
 923                     boolean endTag = true;
 924                     char ch;
 925                     do {
 926                       ch = source[currentPosition++];
 927                       if (ch == '\r' || ch == '\n') {
 928                         if (recordLineSeparator) {
 929                           pushLineSeparator();
 930                         } else {
 931                           currentLine = null;
 932                         }
 933                         for (int i = 0; i < heredocLength; i++) {
 934                           if (source[currentPosition + i]
 935                             != source[heredocStart + i]) {
 936                             endTag = false;
 937                             break;
 938                           }
 939                         }
 940                         if (endTag) {
 941                           currentPosition += heredocLength - 1;
 942                           currentCharacter = source[currentPosition++];
 943                           break; // do...while loop
 944                         } else {
 945                           endTag = true;
 946                         }
 947                       }
 948
 949                     } while (true);
 950
 951                     return TokenNameHEREDOC;
 952                   }
 953                   return TokenNameLEFT_SHIFT;
 954                 }
 955                 return TokenNameLESS;
 956               }
 957             case '>' :
 958               {
 959                 int test;
 960                 if ((test = getNextChar('=', '>')) == 0)
 961                   return TokenNameGREATER_EQUAL;
 962                 if (test > 0) {
 963                   if ((test = getNextChar('=', '>')) == 0)
 964                     return TokenNameRIGHT_SHIFT_EQUAL;
 965                   return TokenNameRIGHT_SHIFT;
 966                 }
 967                 return TokenNameGREATER;
 968               }
 969             case '=' :
 970               if (getNextChar('='))
 971                 return TokenNameEQUAL_EQUAL;
 972               if (getNextChar('>'))
 973                 return TokenNameEQUAL_GREATER;
 974               return TokenNameEQUAL;
 975             case '&' :
 976               {
 977                 int test;
 978                 if ((test = getNextChar('&', '=')) == 0)
 979                   return TokenNameAND_AND;
 980                 if (test > 0)
 981                   return TokenNameAND_EQUAL;
 982                 return TokenNameAND;
 983               }
 984             case '|' :
 985               {
 986                 int test;
 987                 if ((test = getNextChar('|', '=')) == 0)
 988                   return TokenNameOR_OR;
 989                 if (test > 0)
 990                   return TokenNameOR_EQUAL;
 991                 return TokenNameOR;
 992               }
 993             case '^' :
 994               if (getNextChar('='))
 995                 return TokenNameXOR_EQUAL;
 996               return TokenNameXOR;
 997             case '?' :
 998               if (getNextChar('>')) {
 999                 phpMode = false;
1000                 return TokenNameStopPHP;
1001               }
1002               return TokenNameQUESTION;
1003             case ':' :
1004               if (getNextChar(':'))
1005                 return TokenNameCOLON_COLON;
1006               return TokenNameCOLON;
1007             case '@' :
1008               return TokenNameAT;
1009               //                                        case '\'' :
1010               //                                                {
1011               //                                                        int test;
1012               //                                                        if ((test = getNextChar('\n', '\r')) == 0) {
1013               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1014               //                                                        }
1015               //                                                        if (test > 0) {
1016               //                                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1017               //                                                                for (int lookAhead = 0;
1018               //                                                                        lookAhead < 3;
1019               //                                                                        lookAhead++) {
1020               //                                                                        if (currentPosition + lookAhead
1021               //                                                                                == source.length)
1022               //                                                                                break;
1023               //                                                                        if (source[currentPosition + lookAhead]
1024               //                                                                                == '\n')
1025               //                                                                                break;
1026               //                                                                        if (source[currentPosition + lookAhead]
1027               //                                                                                == '\'') {
1028               //                                                                                currentPosition += lookAhead + 1;
1029               //                                                                                break;
1030               //                                                                        }
1031               //                                                                }
1032               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1033               //                                                        }
1034               //                                                }
1035               //                                                if (getNextChar('\'')) {
1036               //                                                        // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1037               //                                                        for (int lookAhead = 0;
1038               //                                                                lookAhead < 3;
1039               //                                                                lookAhead++) {
1040               //                                                                if (currentPosition + lookAhead
1041               //                                                                        == source.length)
1042               //                                                                        break;
1043               //                                                                if (source[currentPosition + lookAhead]
1044               //                                                                        == '\n')
1045               //                                                                        break;
1046               //                                                                if (source[currentPosition + lookAhead]
1047               //                                                                        == '\'') {
1048               //                                                                        currentPosition += lookAhead + 1;
1049               //                                                                        break;
1050               //                                                                }
1051               //                                                        }
1052               //                                                        throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1053               //                                                }
1054               //                                                if (getNextChar('\\'))
1055               //                                                        scanEscapeCharacter();
1056               //                                                else { // consume next character
1057               //                                                        unicodeAsBackSlash = false;
1058               //                                                        if (((currentCharacter = source[currentPosition++])
1059               //                                                                == '\\')
1060               //                                                                && (source[currentPosition] == 'u')) {
1061               //                                                                getNextUnicodeChar();
1062               //                                                        } else {
1063               //                                                                if (withoutUnicodePtr != 0) {
1064               //                                                                        withoutUnicodeBuffer[++withoutUnicodePtr] =
1065               //                                                                                currentCharacter;
1066               //                                                                }
1067               //                                                        }
1068               //                                                }
1069               //                                                //            if (getNextChar('\''))
1070               //                                                //              return TokenNameCharacterLiteral;
1071               //                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1072               //                                                for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1073               //                                                        if (currentPosition + lookAhead == source.length)
1074               //                                                                break;
1075               //                                                        if (source[currentPosition + lookAhead] == '\n')
1076               //                                                                break;
1077               //                                                        if (source[currentPosition + lookAhead] == '\'') {
1078               //                                                                currentPosition += lookAhead + 1;
1079               //                                                                break;
1080               //                                                        }
1081               //                                                }
1082               //                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1083             case '\'' :
1084               try {
1085                 // consume next character
1086                 unicodeAsBackSlash = false;
1087                 currentCharacter = source[currentPosition++];
1088 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1089 //                  && (source[currentPosition] == 'u')) {
1090 //                  getNextUnicodeChar();
1091 //                } else {
1092 //                  if (withoutUnicodePtr != 0) {
1093 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1094 //                      currentCharacter;
1095 //                  }
1096 //                }
1097
1098                 while (currentCharacter != '\'') {
1099
1100                   /**** in PHP \r and \n are valid in string literals ****/
1101                   //                  if ((currentCharacter == '\n')
1102                   //                    || (currentCharacter == '\r')) {
1103                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1104                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1105                   //                      if (currentPosition + lookAhead == source.length)
1106                   //                        break;
1107                   //                      if (source[currentPosition + lookAhead] == '\n')
1108                   //                        break;
1109                   //                      if (source[currentPosition + lookAhead] == '\"') {
1110                   //                        currentPosition += lookAhead + 1;
1111                   //                        break;
1112                   //                      }
1113                   //                    }
1114                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1115                   //                  }
1116                   if (currentCharacter == '\\') {
1117                     int escapeSize = currentPosition;
1118                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1119                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1120                     scanSingleQuotedEscapeCharacter();
1121                     escapeSize = currentPosition - escapeSize;
1122                     if (withoutUnicodePtr == 0) {
1123                       //buffer all the entries that have been left aside....
1124                       withoutUnicodePtr =
1125                         currentPosition - escapeSize - 1 - startPosition;
1126                       System.arraycopy(
1127                         source,
1128                         startPosition,
1129                         withoutUnicodeBuffer,
1130                         1,
1131                         withoutUnicodePtr);
1132                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1133                         currentCharacter;
1134                     } else { //overwrite the / in the buffer
1135                       withoutUnicodeBuffer[withoutUnicodePtr] =
1136                         currentCharacter;
1137                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1138                         withoutUnicodePtr--;
1139                       }
1140                     }
1141                   }
1142                   // consume next character
1143                   unicodeAsBackSlash = false;
1144                   currentCharacter = source[currentPosition++];
1145 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1146 //                    && (source[currentPosition] == 'u')) {
1147 //                    getNextUnicodeChar();
1148 //                  } else {
1149                     if (withoutUnicodePtr != 0) {
1150                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1151                         currentCharacter;
1152                     }
1153 //                  }
1154
1155                 }
1156               } catch (IndexOutOfBoundsException e) {
1157                 throw new InvalidInputException(UNTERMINATED_STRING);
1158               } catch (InvalidInputException e) {
1159                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1160                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1161                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1162                     if (currentPosition + lookAhead == source.length)
1163                       break;
1164                     if (source[currentPosition + lookAhead] == '\n')
1165                       break;
1166                     if (source[currentPosition + lookAhead] == '\'') {
1167                       currentPosition += lookAhead + 1;
1168                       break;
1169                     }
1170                   }
1171
1172                 }
1173                 throw e; // rethrow
1174               }
1175               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1176                 if (currentLine == null) {
1177                   currentLine = new NLSLine();
1178                   lines.add(currentLine);
1179                 }
1180                 currentLine.add(
1181                   new StringLiteral(
1182                     getCurrentTokenSourceString(),
1183                     startPosition,
1184                     currentPosition - 1));
1185               }
1186               return TokenNameStringConstant;
1187             case '"' :
1188               try {
1189                 // consume next character
1190                 unicodeAsBackSlash = false;
1191                 currentCharacter = source[currentPosition++];
1192 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1193 //                  && (source[currentPosition] == 'u')) {
1194 //                  getNextUnicodeChar();
1195 //                } else {
1196 //                  if (withoutUnicodePtr != 0) {
1197 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1198 //                      currentCharacter;
1199 //                  }
1200 //                }
1201
1202                 while (currentCharacter != '"') {
1203
1204                   /**** in PHP \r and \n are valid in string literals ****/
1205                   //                  if ((currentCharacter == '\n')
1206                   //                    || (currentCharacter == '\r')) {
1207                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1208                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1209                   //                      if (currentPosition + lookAhead == source.length)
1210                   //                        break;
1211                   //                      if (source[currentPosition + lookAhead] == '\n')
1212                   //                        break;
1213                   //                      if (source[currentPosition + lookAhead] == '\"') {
1214                   //                        currentPosition += lookAhead + 1;
1215                   //                        break;
1216                   //                      }
1217                   //                    }
1218                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1219                   //                  }
1220                   if (currentCharacter == '\\') {
1221                     int escapeSize = currentPosition;
1222                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1223                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1224                     scanDoubleQuotedEscapeCharacter();
1225                     escapeSize = currentPosition - escapeSize;
1226                     if (withoutUnicodePtr == 0) {
1227                       //buffer all the entries that have been left aside....
1228                       withoutUnicodePtr =
1229                         currentPosition - escapeSize - 1 - startPosition;
1230                       System.arraycopy(
1231                         source,
1232                         startPosition,
1233                         withoutUnicodeBuffer,
1234                         1,
1235                         withoutUnicodePtr);
1236                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1237                         currentCharacter;
1238                     } else { //overwrite the / in the buffer
1239                       withoutUnicodeBuffer[withoutUnicodePtr] =
1240                         currentCharacter;
1241                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1242                         withoutUnicodePtr--;
1243                       }
1244                     }
1245                   }
1246                   // consume next character
1247                   unicodeAsBackSlash = false;
1248                   currentCharacter = source[currentPosition++];
1249 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1250 //                    && (source[currentPosition] == 'u')) {
1251 //                    getNextUnicodeChar();
1252 //                  } else {
1253                     if (withoutUnicodePtr != 0) {
1254                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1255                         currentCharacter;
1256                     }
1257 //                  }
1258
1259                 }
1260               } catch (IndexOutOfBoundsException e) {
1261                 throw new InvalidInputException(UNTERMINATED_STRING);
1262               } catch (InvalidInputException e) {
1263                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1264                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1265                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1266                     if (currentPosition + lookAhead == source.length)
1267                       break;
1268                     if (source[currentPosition + lookAhead] == '\n')
1269                       break;
1270                     if (source[currentPosition + lookAhead] == '\"') {
1271                       currentPosition += lookAhead + 1;
1272                       break;
1273                     }
1274                   }
1275
1276                 }
1277                 throw e; // rethrow
1278               }
1279               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1280                 if (currentLine == null) {
1281                   currentLine = new NLSLine();
1282                   lines.add(currentLine);
1283                 }
1284                 currentLine.add(
1285                   new StringLiteral(
1286                     getCurrentTokenSourceString(),
1287                     startPosition,
1288                     currentPosition - 1));
1289               }
1290               return TokenNameStringLiteral;
1291             case '`' :
1292               try {
1293                 // consume next character
1294                 unicodeAsBackSlash = false;
1295                 currentCharacter = source[currentPosition++];
1296 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1297 //                  && (source[currentPosition] == 'u')) {
1298 //                  getNextUnicodeChar();
1299 //                } else {
1300 //                  if (withoutUnicodePtr != 0) {
1301 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1302 //                      currentCharacter;
1303 //                  }
1304 //                }
1305
1306                 while (currentCharacter != '`') {
1307
1308                   /**** in PHP \r and \n are valid in string literals ****/
1309                   //                if ((currentCharacter == '\n')
1310                   //                  || (currentCharacter == '\r')) {
1311                   //                  // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1312                   //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1313                   //                    if (currentPosition + lookAhead == source.length)
1314                   //                      break;
1315                   //                    if (source[currentPosition + lookAhead] == '\n')
1316                   //                      break;
1317                   //                    if (source[currentPosition + lookAhead] == '\"') {
1318                   //                      currentPosition += lookAhead + 1;
1319                   //                      break;
1320                   //                    }
1321                   //                  }
1322                   //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1323                   //                }
1324                   if (currentCharacter == '\\') {
1325                     int escapeSize = currentPosition;
1326                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1327                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1328                     scanDoubleQuotedEscapeCharacter();
1329                     escapeSize = currentPosition - escapeSize;
1330                     if (withoutUnicodePtr == 0) {
1331                       //buffer all the entries that have been left aside....
1332                       withoutUnicodePtr =
1333                         currentPosition - escapeSize - 1 - startPosition;
1334                       System.arraycopy(
1335                         source,
1336                         startPosition,
1337                         withoutUnicodeBuffer,
1338                         1,
1339                         withoutUnicodePtr);
1340                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1341                         currentCharacter;
1342                     } else { //overwrite the / in the buffer
1343                       withoutUnicodeBuffer[withoutUnicodePtr] =
1344                         currentCharacter;
1345                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1346                         withoutUnicodePtr--;
1347                       }
1348                     }
1349                   }
1350                   // consume next character
1351                   unicodeAsBackSlash = false;
1352                   currentCharacter = source[currentPosition++];
1353 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1354 //                    && (source[currentPosition] == 'u')) {
1355 //                    getNextUnicodeChar();
1356 //                  } else {
1357                     if (withoutUnicodePtr != 0) {
1358                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1359                         currentCharacter;
1360                     }
1361 //                  }
1362
1363                 }
1364               } catch (IndexOutOfBoundsException e) {
1365                 throw new InvalidInputException(UNTERMINATED_STRING);
1366               } catch (InvalidInputException e) {
1367                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1368                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1369                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1370                     if (currentPosition + lookAhead == source.length)
1371                       break;
1372                     if (source[currentPosition + lookAhead] == '\n')
1373                       break;
1374                     if (source[currentPosition + lookAhead] == '`') {
1375                       currentPosition += lookAhead + 1;
1376                       break;
1377                     }
1378                   }
1379
1380                 }
1381                 throw e; // rethrow
1382               }
1383               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1384                 if (currentLine == null) {
1385                   currentLine = new NLSLine();
1386                   lines.add(currentLine);
1387                 }
1388                 currentLine.add(
1389                   new StringLiteral(
1390                     getCurrentTokenSourceString(),
1391                     startPosition,
1392                     currentPosition - 1));
1393               }
1394               return TokenNameStringInterpolated;
1395             case '#' :
1396             case '/' :
1397               {
1398                 int test;
1399                 if ((currentCharacter == '#')
1400                   || (test = getNextChar('/', '*')) == 0) {
1401                   //line comment
1402                   int endPositionForLineComment = 0;
1403                   try { //get the next char
1404                     currentCharacter = source[currentPosition++];
1405 //                    if (((currentCharacter = source[currentPosition++])
1406 //                      == '\\')
1407 //                      && (source[currentPosition] == 'u')) {
1408 //                      //-------------unicode traitement ------------
1409 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1410 //                      currentPosition++;
1411 //                      while (source[currentPosition] == 'u') {
1412 //                        currentPosition++;
1413 //                      }
1414 //                      if ((c1 =
1415 //                        Character.getNumericValue(source[currentPosition++]))
1416 //                        > 15
1417 //                        || c1 < 0
1418 //                        || (c2 =
1419 //                          Character.getNumericValue(source[currentPosition++]))
1420 //                          > 15
1421 //                        || c2 < 0
1422 //                        || (c3 =
1423 //                          Character.getNumericValue(source[currentPosition++]))
1424 //                          > 15
1425 //                        || c3 < 0
1426 //                        || (c4 =
1427 //                          Character.getNumericValue(source[currentPosition++]))
1428 //                          > 15
1429 //                        || c4 < 0) {
1430 //                        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1431 //                      } else {
1432 //                        currentCharacter =
1433 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1434 //                      }
1435 //                    }
1436
1437                     //handle the \\u case manually into comment
1438 //                    if (currentCharacter == '\\') {
1439 //                      if (source[currentPosition] == '\\')
1440 //                        currentPosition++;
1441 //                    } //jump over the \\
1442                     boolean isUnicode = false;
1443                     while (currentCharacter != '\r'
1444                       && currentCharacter != '\n') {
1445                       if (currentCharacter == '?') {
1446                         if (getNextChar('>')) {
1447                           startPosition = currentPosition - 2;
1448                           phpMode = false;
1449                           return TokenNameStopPHP;
1450                         }
1451                       }
1452
1453                       //get the next char
1454                       isUnicode = false;
1455                       currentCharacter = source[currentPosition++];
1456 //                      if (((currentCharacter = source[currentPosition++])
1457 //                        == '\\')
1458 //                        && (source[currentPosition] == 'u')) {
1459 //                        isUnicode = true;
1460 //                        //-------------unicode traitement ------------
1461 //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1462 //                        currentPosition++;
1463 //                        while (source[currentPosition] == 'u') {
1464 //                          currentPosition++;
1465 //                        }
1466 //                        if ((c1 =
1467 //                          Character.getNumericValue(source[currentPosition++]))
1468 //                          > 15
1469 //                          || c1 < 0
1470 //                          || (c2 =
1471 //                            Character.getNumericValue(
1472 //                              source[currentPosition++]))
1473 //                            > 15
1474 //                          || c2 < 0
1475 //                          || (c3 =
1476 //                            Character.getNumericValue(
1477 //                              source[currentPosition++]))
1478 //                            > 15
1479 //                          || c3 < 0
1480 //                          || (c4 =
1481 //                            Character.getNumericValue(
1482 //                              source[currentPosition++]))
1483 //                            > 15
1484 //                          || c4 < 0) {
1485 //                          throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1486 //                        } else {
1487 //                          currentCharacter =
1488 //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1489 //                        }
1490 //                      }
1491                       //handle the \\u case manually into comment
1492 //                      if (currentCharacter == '\\') {
1493 //                        if (source[currentPosition] == '\\')
1494 //                          currentPosition++;
1495 //                      } //jump over the \\
1496                     }
1497                     if (isUnicode) {
1498                       endPositionForLineComment = currentPosition - 6;
1499                     } else {
1500                       endPositionForLineComment = currentPosition - 1;
1501                     }
1502                     recordComment(false);
1503                     if ((currentCharacter == '\r')
1504                       || (currentCharacter == '\n')) {
1505                       checkNonExternalizeString();
1506                       if (recordLineSeparator) {
1507                         if (isUnicode) {
1508                           pushUnicodeLineSeparator();
1509                         } else {
1510                           pushLineSeparator();
1511                         }
1512                       } else {
1513                         currentLine = null;
1514                       }
1515                     }
1516                     if (tokenizeComments) {
1517                       if (!isUnicode) {
1518                         currentPosition = endPositionForLineComment;
1519                         // reset one character behind
1520                       }
1521                       return TokenNameCOMMENT_LINE;
1522                     }
1523                   } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1524                     if (tokenizeComments) {
1525                       currentPosition--;
1526                       // reset one character behind
1527                       return TokenNameCOMMENT_LINE;
1528                     }
1529                   }
1530                   break;
1531                 }
1532                 if (test > 0) {
1533                   //traditional and annotation comment
1534                   boolean isJavadoc = false, star = false;
1535                   // consume next character
1536                   unicodeAsBackSlash = false;
1537                   currentCharacter = source[currentPosition++];
1538 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1539 //                    && (source[currentPosition] == 'u')) {
1540 //                    getNextUnicodeChar();
1541 //                  } else {
1542 //                    if (withoutUnicodePtr != 0) {
1543 //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1544 //                        currentCharacter;
1545 //                    }
1546 //                  }
1547
1548                   if (currentCharacter == '*') {
1549                     isJavadoc = true;
1550                     star = true;
1551                   }
1552                   if ((currentCharacter == '\r')
1553                     || (currentCharacter == '\n')) {
1554                     checkNonExternalizeString();
1555                     if (recordLineSeparator) {
1556                       pushLineSeparator();
1557                     } else {
1558                       currentLine = null;
1559                     }
1560                   }
1561                   try { //get the next char
1562                     currentCharacter = source[currentPosition++];
1563 //                    if (((currentCharacter = source[currentPosition++])
1564 //                      == '\\')
1565 //                      && (source[currentPosition] == 'u')) {
1566 //                      //-------------unicode traitement ------------
1567 //                      getNextUnicodeChar();
1568 //                    }
1569                     //handle the \\u case manually into comment
1570 //                    if (currentCharacter == '\\') {
1571 //                      if (source[currentPosition] == '\\')
1572 //                        currentPosition++;
1573 //                      //jump over the \\
1574 //                    }
1575                     // empty comment is not a javadoc /**/
1576                     if (currentCharacter == '/') {
1577                       isJavadoc = false;
1578                     }
1579                     //loop until end of comment */
1580                     while ((currentCharacter != '/') || (!star)) {
1581                       if ((currentCharacter == '\r')
1582                         || (currentCharacter == '\n')) {
1583                         checkNonExternalizeString();
1584                         if (recordLineSeparator) {
1585                           pushLineSeparator();
1586                         } else {
1587                           currentLine = null;
1588                         }
1589                       }
1590                       star = currentCharacter == '*';
1591                       //get next char
1592                       currentCharacter = source[currentPosition++];
1593 //                      if (((currentCharacter = source[currentPosition++])
1594 //                        == '\\')
1595 //                        && (source[currentPosition] == 'u')) {
1596 //                        //-------------unicode traitement ------------
1597 //                        getNextUnicodeChar();
1598 //                      }
1599                       //handle the \\u case manually into comment
1600 //                      if (currentCharacter == '\\') {
1601 //                        if (source[currentPosition] == '\\')
1602 //                          currentPosition++;
1603 //                      } //jump over the \\
1604                     }
1605                     recordComment(isJavadoc);
1606                     if (tokenizeComments) {
1607                       if (isJavadoc)
1608                         return TokenNameCOMMENT_PHPDOC;
1609                       return TokenNameCOMMENT_BLOCK;
1610                     }
1611                   } catch (IndexOutOfBoundsException e) {
1612                     throw new InvalidInputException(UNTERMINATED_COMMENT);
1613                   }
1614                   break;
1615                 }
1616                 if (getNextChar('='))
1617                   return TokenNameDIVIDE_EQUAL;
1618                 return TokenNameDIVIDE;
1619               }
1620             case '\u001a' :
1621               if (atEnd())
1622                 return TokenNameEOF;
1623               //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1624               throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1625
1626             default :
1627               if (currentCharacter == '$') {
1628                 while ((currentCharacter = source[currentPosition++]) == '$') {
1629                 }
1630                 if (currentCharacter == '{')
1631                   return TokenNameDOLLAR_LBRACE;
1632                 if (isPHPIdentifierStart(currentCharacter))
1633                   return scanIdentifierOrKeyword(true);
1634                 return TokenNameERROR;
1635               }
1636               if (isPHPIdentifierStart(currentCharacter))
1637                 return scanIdentifierOrKeyword(false);
1638               if (Character.isDigit(currentCharacter))
1639                 return scanNumber(false);
1640               return TokenNameERROR;
1641           }
1642         }
1643       } //-----------------end switch while try--------------------
1644       catch (IndexOutOfBoundsException e) {
1645       }
1646     }
1647     return TokenNameEOF;
1648   }
1649
1650 //  public final void getNextUnicodeChar()
1651 //    throws IndexOutOfBoundsException, InvalidInputException {
1652 //    //VOID
1653 //    //handle the case of unicode.
1654 //    //when a unicode appears then we must use a buffer that holds char internal values
1655 //    //At the end of this method currentCharacter holds the new visited char
1656 //    //and currentPosition points right next after it
1657 //
1658 //    //ALL getNextChar.... ARE OPTIMIZED COPIES
1659 //
1660 //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1661 //    currentPosition++;
1662 //    while (source[currentPosition] == 'u') {
1663 //      currentPosition++;
1664 //      unicodeSize++;
1665 //    }
1666 //
1667 //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1668 //      || c1 < 0
1669 //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1670 //      || c2 < 0
1671 //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1672 //      || c3 < 0
1673 //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1674 //      || c4 < 0) {
1675 //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1676 //    } else {
1677 //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1678 //      //need the unicode buffer
1679 //      if (withoutUnicodePtr == 0) {
1680 //        //buffer all the entries that have been left aside....
1681 //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1682 //        System.arraycopy(
1683 //          source,
1684 //          startPosition,
1685 //          withoutUnicodeBuffer,
1686 //          1,
1687 //          withoutUnicodePtr);
1688 //      }
1689 //      //fill the buffer with the char
1690 //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1691 //    }
1692 //    unicodeAsBackSlash = currentCharacter == '\\';
1693 //  }
1694   /* Tokenize a method body, assuming that curly brackets are properly balanced.
1695    */
1696   public final void jumpOverMethodBody() {
1697
1698     this.wasAcr = false;
1699     int found = 1;
1700     try {
1701       while (true) { //loop for jumping over comments
1702         // ---------Consume white space and handles startPosition---------
1703         boolean isWhiteSpace;
1704         do {
1705           startPosition = currentPosition;
1706           currentCharacter = source[currentPosition++];
1707 //          if (((currentCharacter = source[currentPosition++]) == '\\')
1708 //            && (source[currentPosition] == 'u')) {
1709 //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
1710 //          } else {
1711             if (recordLineSeparator
1712               && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1713               pushLineSeparator();
1714             isWhiteSpace = Character.isWhitespace(currentCharacter);
1715 //          }
1716         } while (isWhiteSpace);
1717
1718         // -------consume token until } is found---------
1719         switch (currentCharacter) {
1720           case '{' :
1721             found++;
1722             break;
1723           case '}' :
1724             found--;
1725             if (found == 0)
1726               return;
1727             break;
1728           case '\'' :
1729             {
1730               boolean test;
1731               test = getNextChar('\\');
1732               if (test) {
1733                 try {
1734                   scanDoubleQuotedEscapeCharacter();
1735                 } catch (InvalidInputException ex) {
1736                 };
1737               } else {
1738 //                try { // consume next character
1739                   unicodeAsBackSlash = false;
1740                   currentCharacter = source[currentPosition++];
1741 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1742 //                    && (source[currentPosition] == 'u')) {
1743 //                    getNextUnicodeChar();
1744 //                  } else {
1745                     if (withoutUnicodePtr != 0) {
1746                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1747                         currentCharacter;
1748                     }
1749 //                  }
1750 //                } catch (InvalidInputException ex) {
1751 //                };
1752               }
1753               getNextChar('\'');
1754               break;
1755             }
1756           case '"' :
1757             try {
1758 //              try { // consume next character
1759                 unicodeAsBackSlash = false;
1760                 currentCharacter = source[currentPosition++];
1761 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1762 //                  && (source[currentPosition] == 'u')) {
1763 //                  getNextUnicodeChar();
1764 //                } else {
1765                   if (withoutUnicodePtr != 0) {
1766                     withoutUnicodeBuffer[++withoutUnicodePtr] =
1767                       currentCharacter;
1768                   }
1769 //                }
1770 //              } catch (InvalidInputException ex) {
1771 //              };
1772               while (currentCharacter != '"') {
1773                 if (currentCharacter == '\r') {
1774                   if (source[currentPosition] == '\n')
1775                     currentPosition++;
1776                   break;
1777                   // the string cannot go further that the line
1778                 }
1779                 if (currentCharacter == '\n') {
1780                   break;
1781                   // the string cannot go further that the line
1782                 }
1783                 if (currentCharacter == '\\') {
1784                   try {
1785                     scanDoubleQuotedEscapeCharacter();
1786                   } catch (InvalidInputException ex) {
1787                   };
1788                 }
1789 //                try { // consume next character
1790                   unicodeAsBackSlash = false;
1791                   currentCharacter = source[currentPosition++];
1792 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1793 //                    && (source[currentPosition] == 'u')) {
1794 //                    getNextUnicodeChar();
1795 //                  } else {
1796                     if (withoutUnicodePtr != 0) {
1797                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1798                         currentCharacter;
1799                     }
1800 //                  }
1801 //                } catch (InvalidInputException ex) {
1802 //                };
1803               }
1804             } catch (IndexOutOfBoundsException e) {
1805               return;
1806             }
1807             break;
1808           case '/' :
1809             {
1810               int test;
1811               if ((test = getNextChar('/', '*')) == 0) {
1812                 //line comment
1813                 try {
1814                   //get the next char
1815                   currentCharacter = source[currentPosition++];
1816 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1817 //                    && (source[currentPosition] == 'u')) {
1818 //                    //-------------unicode traitement ------------
1819 //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1820 //                    currentPosition++;
1821 //                    while (source[currentPosition] == 'u') {
1822 //                      currentPosition++;
1823 //                    }
1824 //                    if ((c1 =
1825 //                      Character.getNumericValue(source[currentPosition++]))
1826 //                      > 15
1827 //                      || c1 < 0
1828 //                      || (c2 =
1829 //                        Character.getNumericValue(source[currentPosition++]))
1830 //                        > 15
1831 //                      || c2 < 0
1832 //                      || (c3 =
1833 //                        Character.getNumericValue(source[currentPosition++]))
1834 //                        > 15
1835 //                      || c3 < 0
1836 //                      || (c4 =
1837 //                        Character.getNumericValue(source[currentPosition++]))
1838 //                        > 15
1839 //                      || c4 < 0) {
1840 //                      //error don't care of the value
1841 //                      currentCharacter = 'A';
1842 //                    } //something different from \n and \r
1843 //                    else {
1844 //                      currentCharacter =
1845 //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1846 //                    }
1847 //                  }
1848
1849                   while (currentCharacter != '\r'
1850                     && currentCharacter != '\n') {
1851                     //get the next char
1852                     currentCharacter = source[currentPosition++];
1853 //                    if (((currentCharacter = source[currentPosition++])
1854 //                      == '\\')
1855 //                      && (source[currentPosition] == 'u')) {
1856 //                      //-------------unicode traitement ------------
1857 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1858 //                      currentPosition++;
1859 //                      while (source[currentPosition] == 'u') {
1860 //                        currentPosition++;
1861 //                      }
1862 //                      if ((c1 =
1863 //                        Character.getNumericValue(source[currentPosition++]))
1864 //                        > 15
1865 //                        || c1 < 0
1866 //                        || (c2 =
1867 //                          Character.getNumericValue(source[currentPosition++]))
1868 //                          > 15
1869 //                        || c2 < 0
1870 //                        || (c3 =
1871 //                          Character.getNumericValue(source[currentPosition++]))
1872 //                          > 15
1873 //                        || c3 < 0
1874 //                        || (c4 =
1875 //                          Character.getNumericValue(source[currentPosition++]))
1876 //                          > 15
1877 //                        || c4 < 0) {
1878 //                        //error don't care of the value
1879 //                        currentCharacter = 'A';
1880 //                      } //something different from \n and \r
1881 //                      else {
1882 //                        currentCharacter =
1883 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1884 //                      }
1885 //                    }
1886                   }
1887                   if (recordLineSeparator
1888                     && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1889                     pushLineSeparator();
1890                 } catch (IndexOutOfBoundsException e) {
1891                 } //an eof will them be generated
1892                 break;
1893               }
1894               if (test > 0) {
1895                 //traditional and annotation comment
1896                 boolean star = false;
1897 //                try { // consume next character
1898                   unicodeAsBackSlash = false;
1899                   currentCharacter = source[currentPosition++];
1900 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1901 //                    && (source[currentPosition] == 'u')) {
1902 //                    getNextUnicodeChar();
1903 //                  } else {
1904                     if (withoutUnicodePtr != 0) {
1905                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1906                         currentCharacter;
1907                     }
1908 //                  };
1909 //                } catch (InvalidInputException ex) {
1910 //                };
1911                 if (currentCharacter == '*') {
1912                   star = true;
1913                 }
1914                 if (recordLineSeparator
1915                   && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1916                   pushLineSeparator();
1917                 try { //get the next char
1918                   currentCharacter = source[currentPosition++];
1919 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1920 //                    && (source[currentPosition] == 'u')) {
1921 //                    //-------------unicode traitement ------------
1922 //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1923 //                    currentPosition++;
1924 //                    while (source[currentPosition] == 'u') {
1925 //                      currentPosition++;
1926 //                    }
1927 //                    if ((c1 =
1928 //                      Character.getNumericValue(source[currentPosition++]))
1929 //                      > 15
1930 //                      || c1 < 0
1931 //                      || (c2 =
1932 //                        Character.getNumericValue(source[currentPosition++]))
1933 //                        > 15
1934 //                      || c2 < 0
1935 //                      || (c3 =
1936 //                        Character.getNumericValue(source[currentPosition++]))
1937 //                        > 15
1938 //                      || c3 < 0
1939 //                      || (c4 =
1940 //                        Character.getNumericValue(source[currentPosition++]))
1941 //                        > 15
1942 //                      || c4 < 0) {
1943 //                      //error don't care of the value
1944 //                      currentCharacter = 'A';
1945 //                    } //something different from * and /
1946 //                    else {
1947 //                      currentCharacter =
1948 //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1949 //                    }
1950 //                  }
1951                   //loop until end of comment */
1952                   while ((currentCharacter != '/') || (!star)) {
1953                     if (recordLineSeparator
1954                       && ((currentCharacter == '\r')
1955                         || (currentCharacter == '\n')))
1956                       pushLineSeparator();
1957                     star = currentCharacter == '*';
1958                     //get next char
1959                     currentCharacter = source[currentPosition++];
1960 //                    if (((currentCharacter = source[currentPosition++])
1961 //                      == '\\')
1962 //                      && (source[currentPosition] == 'u')) {
1963 //                      //-------------unicode traitement ------------
1964 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1965 //                      currentPosition++;
1966 //                      while (source[currentPosition] == 'u') {
1967 //                        currentPosition++;
1968 //                      }
1969 //                      if ((c1 =
1970 //                        Character.getNumericValue(source[currentPosition++]))
1971 //                        > 15
1972 //                        || c1 < 0
1973 //                        || (c2 =
1974 //                          Character.getNumericValue(source[currentPosition++]))
1975 //                          > 15
1976 //                        || c2 < 0
1977 //                        || (c3 =
1978 //                          Character.getNumericValue(source[currentPosition++]))
1979 //                          > 15
1980 //                        || c3 < 0
1981 //                        || (c4 =
1982 //                          Character.getNumericValue(source[currentPosition++]))
1983 //                          > 15
1984 //                        || c4 < 0) {
1985 //                        //error don't care of the value
1986 //                        currentCharacter = 'A';
1987 //                      } //something different from * and /
1988 //                      else {
1989 //                        currentCharacter =
1990 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1991 //                      }
1992 //                    }
1993                   }
1994                 } catch (IndexOutOfBoundsException e) {
1995                   return;
1996                 }
1997                 break;
1998               }
1999               break;
2000             }
2001
2002           default :
2003             if (isPHPIdentifierStart(currentCharacter)
2004               || currentCharacter == '$') {
2005               try {
2006                 scanIdentifierOrKeyword((currentCharacter == '$'));
2007               } catch (InvalidInputException ex) {
2008               };
2009               break;
2010             }
2011             if (Character.isDigit(currentCharacter)) {
2012               try {
2013                 scanNumber(false);
2014               } catch (InvalidInputException ex) {
2015               };
2016               break;
2017             }
2018         }
2019       }
2020       //-----------------end switch while try--------------------
2021     } catch (IndexOutOfBoundsException e) {
2022     } catch (InvalidInputException e) {
2023     }
2024     return;
2025   }
2026 //  public final boolean jumpOverUnicodeWhiteSpace()
2027 //    throws InvalidInputException {
2028 //    //BOOLEAN
2029 //    //handle the case of unicode. Jump over the next whiteSpace
2030 //    //making startPosition pointing on the next available char
2031 //    //On false, the currentCharacter is filled up with a potential
2032 //    //correct char
2033 //
2034 //    try {
2035 //      this.wasAcr = false;
2036 //      int c1, c2, c3, c4;
2037 //      int unicodeSize = 6;
2038 //      currentPosition++;
2039 //      while (source[currentPosition] == 'u') {
2040 //        currentPosition++;
2041 //        unicodeSize++;
2042 //      }
2043 //
2044 //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2045 //        || c1 < 0)
2046 //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2047 //          || c2 < 0)
2048 //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2049 //          || c3 < 0)
2050 //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2051 //          || c4 < 0)) {
2052 //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2053 //      }
2054 //
2055 //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2056 //      if (recordLineSeparator
2057 //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2058 //        pushLineSeparator();
2059 //      if (Character.isWhitespace(currentCharacter))
2060 //        return true;
2061 //
2062 //      //buffer the new char which is not a white space
2063 //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2064 //      //withoutUnicodePtr == 1 is true here
2065 //      return false;
2066 //    } catch (IndexOutOfBoundsException e) {
2067 //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2068 //    }
2069 //  }
2070   public final int[] getLineEnds() {
2071     //return a bounded copy of this.lineEnds
2072
2073     int[] copy;
2074     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2075     return copy;
2076   }
2077
2078   public char[] getSource() {
2079     return this.source;
2080   }
2081   final char[] optimizedCurrentTokenSource1() {
2082     //return always the same char[] build only once
2083
2084     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2085     char charOne = source[startPosition];
2086     switch (charOne) {
2087       case 'a' :
2088         return charArray_a;
2089       case 'b' :
2090         return charArray_b;
2091       case 'c' :
2092         return charArray_c;
2093       case 'd' :
2094         return charArray_d;
2095       case 'e' :
2096         return charArray_e;
2097       case 'f' :
2098         return charArray_f;
2099       case 'g' :
2100         return charArray_g;
2101       case 'h' :
2102         return charArray_h;
2103       case 'i' :
2104         return charArray_i;
2105       case 'j' :
2106         return charArray_j;
2107       case 'k' :
2108         return charArray_k;
2109       case 'l' :
2110         return charArray_l;
2111       case 'm' :
2112         return charArray_m;
2113       case 'n' :
2114         return charArray_n;
2115       case 'o' :
2116         return charArray_o;
2117       case 'p' :
2118         return charArray_p;
2119       case 'q' :
2120         return charArray_q;
2121       case 'r' :
2122         return charArray_r;
2123       case 's' :
2124         return charArray_s;
2125       case 't' :
2126         return charArray_t;
2127       case 'u' :
2128         return charArray_u;
2129       case 'v' :
2130         return charArray_v;
2131       case 'w' :
2132         return charArray_w;
2133       case 'x' :
2134         return charArray_x;
2135       case 'y' :
2136         return charArray_y;
2137       case 'z' :
2138         return charArray_z;
2139       default :
2140         return new char[] { charOne };
2141     }
2142   }
2143
2144   final char[] optimizedCurrentTokenSource2() {
2145     //try to return the same char[] build only once
2146
2147     char c0, c1;
2148     int hash =
2149       (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2150         % TableSize;
2151     char[][] table = charArray_length[0][hash];
2152     int i = newEntry2;
2153     while (++i < InternalTableSize) {
2154       char[] charArray = table[i];
2155       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2156         return charArray;
2157     }
2158     //---------other side---------
2159     i = -1;
2160     int max = newEntry2;
2161     while (++i <= max) {
2162       char[] charArray = table[i];
2163       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2164         return charArray;
2165     }
2166     //--------add the entry-------
2167     if (++max >= InternalTableSize)
2168       max = 0;
2169     char[] r;
2170     table[max] = (r = new char[] { c0, c1 });
2171     newEntry2 = max;
2172     return r;
2173   }
2174
2175   final char[] optimizedCurrentTokenSource3() {
2176     //try to return the same char[] build only once
2177
2178     char c0, c1, c2;
2179     int hash =
2180       (((c0 = source[startPosition]) << 12)
2181         + ((c1 = source[startPosition + 1]) << 6)
2182         + (c2 = source[startPosition + 2]))
2183         % TableSize;
2184     char[][] table = charArray_length[1][hash];
2185     int i = newEntry3;
2186     while (++i < InternalTableSize) {
2187       char[] charArray = table[i];
2188       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2189         return charArray;
2190     }
2191     //---------other side---------
2192     i = -1;
2193     int max = newEntry3;
2194     while (++i <= max) {
2195       char[] charArray = table[i];
2196       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2197         return charArray;
2198     }
2199     //--------add the entry-------
2200     if (++max >= InternalTableSize)
2201       max = 0;
2202     char[] r;
2203     table[max] = (r = new char[] { c0, c1, c2 });
2204     newEntry3 = max;
2205     return r;
2206   }
2207
2208   final char[] optimizedCurrentTokenSource4() {
2209     //try to return the same char[] build only once
2210
2211     char c0, c1, c2, c3;
2212     long hash =
2213       ((((long) (c0 = source[startPosition])) << 18)
2214         + ((c1 = source[startPosition + 1]) << 12)
2215         + ((c2 = source[startPosition + 2]) << 6)
2216         + (c3 = source[startPosition + 3]))
2217         % TableSize;
2218     char[][] table = charArray_length[2][(int) hash];
2219     int i = newEntry4;
2220     while (++i < InternalTableSize) {
2221       char[] charArray = table[i];
2222       if ((c0 == charArray[0])
2223         && (c1 == charArray[1])
2224         && (c2 == charArray[2])
2225         && (c3 == charArray[3]))
2226         return charArray;
2227     }
2228     //---------other side---------
2229     i = -1;
2230     int max = newEntry4;
2231     while (++i <= max) {
2232       char[] charArray = table[i];
2233       if ((c0 == charArray[0])
2234         && (c1 == charArray[1])
2235         && (c2 == charArray[2])
2236         && (c3 == charArray[3]))
2237         return charArray;
2238     }
2239     //--------add the entry-------
2240     if (++max >= InternalTableSize)
2241       max = 0;
2242     char[] r;
2243     table[max] = (r = new char[] { c0, c1, c2, c3 });
2244     newEntry4 = max;
2245     return r;
2246
2247   }
2248
2249   final char[] optimizedCurrentTokenSource5() {
2250     //try to return the same char[] build only once
2251
2252     char c0, c1, c2, c3, c4;
2253     long hash =
2254       ((((long) (c0 = source[startPosition])) << 24)
2255         + (((long) (c1 = source[startPosition + 1])) << 18)
2256         + ((c2 = source[startPosition + 2]) << 12)
2257         + ((c3 = source[startPosition + 3]) << 6)
2258         + (c4 = source[startPosition + 4]))
2259         % TableSize;
2260     char[][] table = charArray_length[3][(int) hash];
2261     int i = newEntry5;
2262     while (++i < InternalTableSize) {
2263       char[] charArray = table[i];
2264       if ((c0 == charArray[0])
2265         && (c1 == charArray[1])
2266         && (c2 == charArray[2])
2267         && (c3 == charArray[3])
2268         && (c4 == charArray[4]))
2269         return charArray;
2270     }
2271     //---------other side---------
2272     i = -1;
2273     int max = newEntry5;
2274     while (++i <= max) {
2275       char[] charArray = table[i];
2276       if ((c0 == charArray[0])
2277         && (c1 == charArray[1])
2278         && (c2 == charArray[2])
2279         && (c3 == charArray[3])
2280         && (c4 == charArray[4]))
2281         return charArray;
2282     }
2283     //--------add the entry-------
2284     if (++max >= InternalTableSize)
2285       max = 0;
2286     char[] r;
2287     table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2288     newEntry5 = max;
2289     return r;
2290
2291   }
2292
2293   final char[] optimizedCurrentTokenSource6() {
2294     //try to return the same char[] build only once
2295
2296     char c0, c1, c2, c3, c4, c5;
2297     long hash =
2298       ((((long) (c0 = source[startPosition])) << 32)
2299         + (((long) (c1 = source[startPosition + 1])) << 24)
2300         + (((long) (c2 = source[startPosition + 2])) << 18)
2301         + ((c3 = source[startPosition + 3]) << 12)
2302         + ((c4 = source[startPosition + 4]) << 6)
2303         + (c5 = source[startPosition + 5]))
2304         % TableSize;
2305     char[][] table = charArray_length[4][(int) hash];
2306     int i = newEntry6;
2307     while (++i < InternalTableSize) {
2308       char[] charArray = table[i];
2309       if ((c0 == charArray[0])
2310         && (c1 == charArray[1])
2311         && (c2 == charArray[2])
2312         && (c3 == charArray[3])
2313         && (c4 == charArray[4])
2314         && (c5 == charArray[5]))
2315         return charArray;
2316     }
2317     //---------other side---------
2318     i = -1;
2319     int max = newEntry6;
2320     while (++i <= max) {
2321       char[] charArray = table[i];
2322       if ((c0 == charArray[0])
2323         && (c1 == charArray[1])
2324         && (c2 == charArray[2])
2325         && (c3 == charArray[3])
2326         && (c4 == charArray[4])
2327         && (c5 == charArray[5]))
2328         return charArray;
2329     }
2330     //--------add the entry-------
2331     if (++max >= InternalTableSize)
2332       max = 0;
2333     char[] r;
2334     table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2335     newEntry6 = max;
2336     return r;
2337   }
2338
2339   public final void pushLineSeparator() throws InvalidInputException {
2340     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2341     final int INCREMENT = 250;
2342
2343     if (this.checkNonExternalizedStringLiterals) {
2344       // reinitialize the current line for non externalize strings purpose
2345       currentLine = null;
2346     }
2347     //currentCharacter is at position currentPosition-1
2348
2349     // cr 000D
2350     if (currentCharacter == '\r') {
2351       int separatorPos = currentPosition - 1;
2352       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2353         return;
2354       //System.out.println("CR-" + separatorPos);
2355       try {
2356         lineEnds[++linePtr] = separatorPos;
2357       } catch (IndexOutOfBoundsException e) {
2358         //linePtr value is correct
2359         int oldLength = lineEnds.length;
2360         int[] old = lineEnds;
2361         lineEnds = new int[oldLength + INCREMENT];
2362         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2363         lineEnds[linePtr] = separatorPos;
2364       }
2365       // look-ahead for merged cr+lf
2366       try {
2367         if (source[currentPosition] == '\n') {
2368           //System.out.println("look-ahead LF-" + currentPosition);
2369           lineEnds[linePtr] = currentPosition;
2370           currentPosition++;
2371           wasAcr = false;
2372         } else {
2373           wasAcr = true;
2374         }
2375       } catch (IndexOutOfBoundsException e) {
2376         wasAcr = true;
2377       }
2378     } else {
2379       // lf 000A
2380       if (currentCharacter == '\n') {
2381         //must merge eventual cr followed by lf
2382         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2383           //System.out.println("merge LF-" + (currentPosition - 1));
2384           lineEnds[linePtr] = currentPosition - 1;
2385         } else {
2386           int separatorPos = currentPosition - 1;
2387           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2388             return;
2389           // System.out.println("LF-" + separatorPos);
2390           try {
2391             lineEnds[++linePtr] = separatorPos;
2392           } catch (IndexOutOfBoundsException e) {
2393             //linePtr value is correct
2394             int oldLength = lineEnds.length;
2395             int[] old = lineEnds;
2396             lineEnds = new int[oldLength + INCREMENT];
2397             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2398             lineEnds[linePtr] = separatorPos;
2399           }
2400         }
2401         wasAcr = false;
2402       }
2403     }
2404   }
2405   public final void pushUnicodeLineSeparator() {
2406     // isUnicode means that the \r or \n has been read as a unicode character
2407
2408     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2409
2410     final int INCREMENT = 250;
2411     //currentCharacter is at position currentPosition-1
2412
2413     if (this.checkNonExternalizedStringLiterals) {
2414       // reinitialize the current line for non externalize strings purpose
2415       currentLine = null;
2416     }
2417
2418     // cr 000D
2419     if (currentCharacter == '\r') {
2420       int separatorPos = currentPosition - 6;
2421       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2422         return;
2423       //System.out.println("CR-" + separatorPos);
2424       try {
2425         lineEnds[++linePtr] = separatorPos;
2426       } catch (IndexOutOfBoundsException e) {
2427         //linePtr value is correct
2428         int oldLength = lineEnds.length;
2429         int[] old = lineEnds;
2430         lineEnds = new int[oldLength + INCREMENT];
2431         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2432         lineEnds[linePtr] = separatorPos;
2433       }
2434       // look-ahead for merged cr+lf
2435       if (source[currentPosition] == '\n') {
2436         //System.out.println("look-ahead LF-" + currentPosition);
2437         lineEnds[linePtr] = currentPosition;
2438         currentPosition++;
2439         wasAcr = false;
2440       } else {
2441         wasAcr = true;
2442       }
2443     } else {
2444       // lf 000A
2445       if (currentCharacter == '\n') {
2446         //must merge eventual cr followed by lf
2447         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2448           //System.out.println("merge LF-" + (currentPosition - 1));
2449           lineEnds[linePtr] = currentPosition - 6;
2450         } else {
2451           int separatorPos = currentPosition - 6;
2452           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2453             return;
2454           // System.out.println("LF-" + separatorPos);
2455           try {
2456             lineEnds[++linePtr] = separatorPos;
2457           } catch (IndexOutOfBoundsException e) {
2458             //linePtr value is correct
2459             int oldLength = lineEnds.length;
2460             int[] old = lineEnds;
2461             lineEnds = new int[oldLength + INCREMENT];
2462             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2463             lineEnds[linePtr] = separatorPos;
2464           }
2465         }
2466         wasAcr = false;
2467       }
2468     }
2469   }
2470   public final void recordComment(boolean isJavadoc) {
2471
2472     // a new annotation comment is recorded
2473     try {
2474       commentStops[++commentPtr] =
2475         isJavadoc ? currentPosition : -currentPosition;
2476     } catch (IndexOutOfBoundsException e) {
2477       int oldStackLength = commentStops.length;
2478       int[] oldStack = commentStops;
2479       commentStops = new int[oldStackLength + 30];
2480       System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2481       commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2482       //grows the positions buffers too
2483       int[] old = commentStarts;
2484       commentStarts = new int[oldStackLength + 30];
2485       System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2486     }
2487
2488     //the buffer is of a correct size here
2489     commentStarts[commentPtr] = startPosition;
2490   }
2491   public void resetTo(int begin, int end) {
2492     //reset the scanner to a given position where it may rescan again
2493
2494     diet = false;
2495     initialPosition = startPosition = currentPosition = begin;
2496     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2497     commentPtr = -1; // reset comment stack
2498   }
2499
2500   public final void scanSingleQuotedEscapeCharacter()
2501     throws InvalidInputException {
2502     // the string with "\\u" is a legal string of two chars \ and u
2503     //thus we use a direct access to the source (for regular cases).
2504
2505 //    if (unicodeAsBackSlash) {
2506 //      // consume next character
2507 //      unicodeAsBackSlash = false;
2508 //      if (((currentCharacter = source[currentPosition++]) == '\\')
2509 //        && (source[currentPosition] == 'u')) {
2510 //        getNextUnicodeChar();
2511 //      } else {
2512 //        if (withoutUnicodePtr != 0) {
2513 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2514 //        }
2515 //      }
2516 //    } else
2517       currentCharacter = source[currentPosition++];
2518     switch (currentCharacter) {
2519       case '\'' :
2520         currentCharacter = '\'';
2521         break;
2522       case '\\' :
2523         currentCharacter = '\\';
2524         break;
2525       default :
2526         currentCharacter = '\\';
2527         currentPosition--;
2528     }
2529   }
2530
2531   public final void scanDoubleQuotedEscapeCharacter()
2532     throws InvalidInputException {
2533     // the string with "\\u" is a legal string of two chars \ and u
2534     //thus we use a direct access to the source (for regular cases).
2535
2536 //    if (unicodeAsBackSlash) {
2537 //      // consume next character
2538 //      unicodeAsBackSlash = false;
2539 //      if (((currentCharacter = source[currentPosition++]) == '\\')
2540 //        && (source[currentPosition] == 'u')) {
2541 //        getNextUnicodeChar();
2542 //      } else {
2543 //        if (withoutUnicodePtr != 0) {
2544 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2545 //        }
2546 //      }
2547 //    } else
2548       currentCharacter = source[currentPosition++];
2549     switch (currentCharacter) {
2550       //      case 'b' :
2551       //        currentCharacter = '\b';
2552       //        break;
2553       case 't' :
2554         currentCharacter = '\t';
2555         break;
2556       case 'n' :
2557         currentCharacter = '\n';
2558         break;
2559         //      case 'f' :
2560         //        currentCharacter = '\f';
2561         //        break;
2562       case 'r' :
2563         currentCharacter = '\r';
2564         break;
2565       case '\"' :
2566         currentCharacter = '\"';
2567         break;
2568       case '\'' :
2569         currentCharacter = '\'';
2570         break;
2571       case '\\' :
2572         currentCharacter = '\\';
2573         break;
2574       case '$' :
2575         currentCharacter = '$';
2576         break;
2577       default :
2578         // -----------octal escape--------------
2579         // OctalDigit
2580         // OctalDigit OctalDigit
2581         // ZeroToThree OctalDigit OctalDigit
2582
2583         int number = Character.getNumericValue(currentCharacter);
2584         if (number >= 0 && number <= 7) {
2585           boolean zeroToThreeNot = number > 3;
2586           if (Character
2587             .isDigit(currentCharacter = source[currentPosition++])) {
2588             int digit = Character.getNumericValue(currentCharacter);
2589             if (digit >= 0 && digit <= 7) {
2590               number = (number * 8) + digit;
2591               if (Character
2592                 .isDigit(currentCharacter = source[currentPosition++])) {
2593                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2594                   currentPosition--;
2595                 } else {
2596                   digit = Character.getNumericValue(currentCharacter);
2597                   if (digit >= 0 && digit <= 7) {
2598                     // has read \ZeroToThree OctalDigit OctalDigit
2599                     number = (number * 8) + digit;
2600                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2601                     currentPosition--;
2602                   }
2603                 }
2604               } else { // has read \OctalDigit NonDigit--> ignore last character
2605                 currentPosition--;
2606               }
2607             } else { // has read \OctalDigit NonOctalDigit--> ignore last character
2608               currentPosition--;
2609             }
2610           } else { // has read \OctalDigit --> ignore last character
2611             currentPosition--;
2612           }
2613           if (number > 255)
2614             throw new InvalidInputException(INVALID_ESCAPE);
2615           currentCharacter = (char) number;
2616         }
2617         //else
2618         //     throw new InvalidInputException(INVALID_ESCAPE);
2619     }
2620   }
2621
2622   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2623   //    return scanIdentifierOrKeyword( false );
2624   //  }
2625
2626   public int scanIdentifierOrKeyword(boolean isVariable)
2627     throws InvalidInputException {
2628     //test keywords
2629
2630     //first dispatch on the first char.
2631     //then the length. If there are several
2632     //keywors with the same length AND the same first char, then do another
2633     //disptach on the second char :-)...cool....but fast !
2634
2635     useAssertAsAnIndentifier = false;
2636
2637     while (getNextCharAsJavaIdentifierPart()) {
2638     };
2639
2640     if (isVariable) {
2641       return TokenNameVariable;
2642     }
2643     int index, length;
2644     char[] data;
2645     char firstLetter;
2646 //    if (withoutUnicodePtr == 0)
2647
2648       //quick test on length == 1 but not on length > 12 while most identifier
2649       //have a length which is <= 12...but there are lots of identifier with
2650       //only one char....
2651
2652 //      {
2653       if ((length = currentPosition - startPosition) == 1)
2654         return TokenNameIdentifier;
2655       //  data = source;
2656       data = new char[length];
2657       index = startPosition;
2658       for (int i = 0; i < length; i++) {
2659         data[i] = Character.toLowerCase(source[index + i]);
2660       }
2661       index = 0;
2662 //    } else {
2663 //      if ((length = withoutUnicodePtr) == 1)
2664 //        return TokenNameIdentifier;
2665 //      // data = withoutUnicodeBuffer;
2666 //      data = new char[withoutUnicodeBuffer.length];
2667 //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2668 //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2669 //      }
2670 //      index = 1;
2671 //    }
2672
2673     firstLetter = data[index];
2674     switch (firstLetter) {
2675
2676       case 'a' : // as and array
2677         switch (length) {
2678           case 2 : //as
2679             if ((data[++index] == 's')) {
2680               return TokenNameas;
2681             } else {
2682               return TokenNameIdentifier;
2683             }
2684           case 3 : //and
2685             if ((data[++index] == 'n') && (data[++index] == 'd')) {
2686               return TokenNameAND;
2687             } else {
2688               return TokenNameIdentifier;
2689             }
2690             //          case 5 :
2691             //            if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2692             //              return TokenNamearray;
2693             //            else
2694             //              return TokenNameIdentifier;
2695           default :
2696             return TokenNameIdentifier;
2697         }
2698       case 'b' : //break
2699         switch (length) {
2700           case 5 :
2701             if ((data[++index] == 'r')
2702               && (data[++index] == 'e')
2703               && (data[++index] == 'a')
2704               && (data[++index] == 'k'))
2705               return TokenNamebreak;
2706             else
2707               return TokenNameIdentifier;
2708           default :
2709             return TokenNameIdentifier;
2710         }
2711
2712       case 'c' : //case class continue
2713         switch (length) {
2714           case 4 :
2715             if ((data[++index] == 'a')
2716               && (data[++index] == 's')
2717               && (data[++index] == 'e'))
2718               return TokenNamecase;
2719             else
2720               return TokenNameIdentifier;
2721           case 5 :
2722             if ((data[++index] == 'l')
2723               && (data[++index] == 'a')
2724               && (data[++index] == 's')
2725               && (data[++index] == 's'))
2726               return TokenNameclass;
2727             else
2728               return TokenNameIdentifier;
2729           case 8 :
2730             if ((data[++index] == 'o')
2731               && (data[++index] == 'n')
2732               && (data[++index] == 't')
2733               && (data[++index] == 'i')
2734               && (data[++index] == 'n')
2735               && (data[++index] == 'u')
2736               && (data[++index] == 'e'))
2737               return TokenNamecontinue;
2738             else
2739               return TokenNameIdentifier;
2740           default :
2741             return TokenNameIdentifier;
2742         }
2743
2744       case 'd' : //define default do
2745         switch (length) {
2746           case 2 :
2747             if ((data[++index] == 'o'))
2748               return TokenNamedo;
2749             else
2750               return TokenNameIdentifier;
2751           case 6 :
2752             if ((data[++index] == 'e')
2753               && (data[++index] == 'f')
2754               && (data[++index] == 'i')
2755               && (data[++index] == 'n')
2756               && (data[++index] == 'e'))
2757               return TokenNamedefine;
2758             else
2759               return TokenNameIdentifier;
2760           case 7 :
2761             if ((data[++index] == 'e')
2762               && (data[++index] == 'f')
2763               && (data[++index] == 'a')
2764               && (data[++index] == 'u')
2765               && (data[++index] == 'l')
2766               && (data[++index] == 't'))
2767               return TokenNamedefault;
2768             else
2769               return TokenNameIdentifier;
2770           default :
2771             return TokenNameIdentifier;
2772         }
2773       case 'e' : //echo else elseif extends
2774         switch (length) {
2775           case 4 :
2776             if ((data[++index] == 'c')
2777               && (data[++index] == 'h')
2778               && (data[++index] == 'o'))
2779               return TokenNameecho;
2780             else if (
2781               (data[index] == 'l')
2782                 && (data[++index] == 's')
2783                 && (data[++index] == 'e'))
2784               return TokenNameelse;
2785             else
2786               return TokenNameIdentifier;
2787           case 5 : // endif
2788             if ((data[++index] == 'n')
2789               && (data[++index] == 'd')
2790               && (data[++index] == 'i')
2791               && (data[++index] == 'f'))
2792               return TokenNameendif;
2793             else
2794               return TokenNameIdentifier;
2795           case 6 : // endfor
2796             if ((data[++index] == 'n')
2797               && (data[++index] == 'd')
2798               && (data[++index] == 'f')
2799               && (data[++index] == 'o')
2800               && (data[++index] == 'r'))
2801               return TokenNameendfor;
2802             else if (
2803               (data[index] == 'l')
2804                 && (data[++index] == 's')
2805                 && (data[++index] == 'e')
2806                 && (data[++index] == 'i')
2807                 && (data[++index] == 'f'))
2808               return TokenNameelseif;
2809             else
2810               return TokenNameIdentifier;
2811           case 7 :
2812             if ((data[++index] == 'x')
2813               && (data[++index] == 't')
2814               && (data[++index] == 'e')
2815               && (data[++index] == 'n')
2816               && (data[++index] == 'd')
2817               && (data[++index] == 's'))
2818               return TokenNameextends;
2819             else
2820               return TokenNameIdentifier;
2821           case 8 : // endwhile
2822             if ((data[++index] == 'n')
2823               && (data[++index] == 'd')
2824               && (data[++index] == 'w')
2825               && (data[++index] == 'h')
2826               && (data[++index] == 'i')
2827               && (data[++index] == 'l')
2828               && (data[++index] == 'e'))
2829               return TokenNameendwhile;
2830             else
2831               return TokenNameIdentifier;
2832           case 9 : // endswitch
2833             if ((data[++index] == 'n')
2834               && (data[++index] == 'd')
2835               && (data[++index] == 's')
2836               && (data[++index] == 'w')
2837               && (data[++index] == 'i')
2838               && (data[++index] == 't')
2839               && (data[++index] == 'c')
2840               && (data[++index] == 'h'))
2841               return TokenNameendswitch;
2842             else
2843               return TokenNameIdentifier;
2844           case 10 : // endforeach
2845             if ((data[++index] == 'n')
2846               && (data[++index] == 'd')
2847               && (data[++index] == 'f')
2848               && (data[++index] == 'o')
2849               && (data[++index] == 'r')
2850               && (data[++index] == 'e')
2851               && (data[++index] == 'a')
2852               && (data[++index] == 'c')
2853               && (data[++index] == 'h'))
2854               return TokenNameendforeach;
2855             else
2856               return TokenNameIdentifier;
2857
2858           default :
2859             return TokenNameIdentifier;
2860         }
2861
2862       case 'f' : //for false function
2863         switch (length) {
2864           case 3 :
2865             if ((data[++index] == 'o') && (data[++index] == 'r'))
2866               return TokenNamefor;
2867             else
2868               return TokenNameIdentifier;
2869           case 5 :
2870             if ((data[++index] == 'a')
2871               && (data[++index] == 'l')
2872               && (data[++index] == 's')
2873               && (data[++index] == 'e'))
2874               return TokenNamefalse;
2875             else
2876               return TokenNameIdentifier;
2877           case 7 : // function
2878             if ((data[++index] == 'o')
2879               && (data[++index] == 'r')
2880               && (data[++index] == 'e')
2881               && (data[++index] == 'a')
2882               && (data[++index] == 'c')
2883               && (data[++index] == 'h'))
2884               return TokenNameforeach;
2885             else
2886               return TokenNameIdentifier;
2887           case 8 : // function
2888             if ((data[++index] == 'u')
2889               && (data[++index] == 'n')
2890               && (data[++index] == 'c')
2891               && (data[++index] == 't')
2892               && (data[++index] == 'i')
2893               && (data[++index] == 'o')
2894               && (data[++index] == 'n'))
2895               return TokenNamefunction;
2896             else
2897               return TokenNameIdentifier;
2898           default :
2899             return TokenNameIdentifier;
2900         }
2901       case 'g' : //global
2902         if (length == 6) {
2903           if ((data[++index] == 'l')
2904             && (data[++index] == 'o')
2905             && (data[++index] == 'b')
2906             && (data[++index] == 'a')
2907             && (data[++index] == 'l')) {
2908             return TokenNameglobal;
2909           }
2910         }
2911         return TokenNameIdentifier;
2912
2913       case 'i' : //if int
2914         switch (length) {
2915           case 2 :
2916             if (data[++index] == 'f')
2917               return TokenNameif;
2918             else
2919               return TokenNameIdentifier;
2920             //          case 3 :
2921             //            if ((data[++index] == 'n') && (data[++index] == 't'))
2922             //              return TokenNameint;
2923             //            else
2924             //              return TokenNameIdentifier;
2925           case 7 :
2926             if ((data[++index] == 'n')
2927               && (data[++index] == 'c')
2928               && (data[++index] == 'l')
2929               && (data[++index] == 'u')
2930               && (data[++index] == 'd')
2931               && (data[++index] == 'e'))
2932               return TokenNameinclude;
2933             else
2934               return TokenNameIdentifier;
2935           case 12 :
2936             if ((data[++index] == 'n')
2937               && (data[++index] == 'c')
2938               && (data[++index] == 'l')
2939               && (data[++index] == 'u')
2940               && (data[++index] == 'd')
2941               && (data[++index] == 'e')
2942               && (data[++index] == '_')
2943               && (data[++index] == 'o')
2944               && (data[++index] == 'n')
2945               && (data[++index] == 'c')
2946               && (data[++index] == 'e'))
2947               return TokenNameinclude_once;
2948             else
2949               return TokenNameIdentifier;
2950           default :
2951             return TokenNameIdentifier;
2952         }
2953
2954       case 'l' : //list
2955         if (length == 4) {
2956           if ((data[++index] == 'i')
2957             && (data[++index] == 's')
2958             && (data[++index] == 't')) {
2959             return TokenNamelist;
2960           }
2961         }
2962         return TokenNameIdentifier;
2963
2964       case 'n' : // new null
2965         switch (length) {
2966           case 3 :
2967             if ((data[++index] == 'e') && (data[++index] == 'w'))
2968               return TokenNamenew;
2969             else
2970               return TokenNameIdentifier;
2971           case 4 :
2972             if ((data[++index] == 'u')
2973               && (data[++index] == 'l')
2974               && (data[++index] == 'l'))
2975               return TokenNamenull;
2976             else
2977               return TokenNameIdentifier;
2978
2979           default :
2980             return TokenNameIdentifier;
2981         }
2982       case 'o' : // or old_function
2983         if (length == 2) {
2984           if (data[++index] == 'r') {
2985             return TokenNameOR;
2986           }
2987         }
2988         //        if (length == 12) {
2989         //          if ((data[++index] == 'l')
2990         //            && (data[++index] == 'd')
2991         //            && (data[++index] == '_')
2992         //            && (data[++index] == 'f')
2993         //            && (data[++index] == 'u')
2994         //            && (data[++index] == 'n')
2995         //            && (data[++index] == 'c')
2996         //            && (data[++index] == 't')
2997         //            && (data[++index] == 'i')
2998         //            && (data[++index] == 'o')
2999         //            && (data[++index] == 'n')) {
3000         //            return TokenNameold_function;
3001         //          }
3002         //        }
3003         return TokenNameIdentifier;
3004
3005       case 'p' : // print
3006         if (length == 5) {
3007           if ((data[++index] == 'r')
3008             && (data[++index] == 'i')
3009             && (data[++index] == 'n')
3010             && (data[++index] == 't')) {
3011             return TokenNameprint;
3012           }
3013         }
3014         return TokenNameIdentifier;
3015       case 'r' : //return require require_once
3016         if (length == 6) {
3017           if ((data[++index] == 'e')
3018             && (data[++index] == 't')
3019             && (data[++index] == 'u')
3020             && (data[++index] == 'r')
3021             && (data[++index] == 'n')) {
3022             return TokenNamereturn;
3023           }
3024         } else if (length == 7) {
3025           if ((data[++index] == 'e')
3026             && (data[++index] == 'q')
3027             && (data[++index] == 'u')
3028             && (data[++index] == 'i')
3029             && (data[++index] == 'r')
3030             && (data[++index] == 'e')) {
3031             return TokenNamerequire;
3032           }
3033         } else if (length == 12) {
3034           if ((data[++index] == 'e')
3035             && (data[++index] == 'q')
3036             && (data[++index] == 'u')
3037             && (data[++index] == 'i')
3038             && (data[++index] == 'r')
3039             && (data[++index] == 'e')
3040             && (data[++index] == '_')
3041             && (data[++index] == 'o')
3042             && (data[++index] == 'n')
3043             && (data[++index] == 'c')
3044             && (data[++index] == 'e')) {
3045             return TokenNamerequire_once;
3046           }
3047         } else
3048           return TokenNameIdentifier;
3049
3050       case 's' : //static switch
3051         switch (length) {
3052           case 6 :
3053             if (data[++index] == 't')
3054               if ((data[++index] == 'a')
3055                 && (data[++index] == 't')
3056                 && (data[++index] == 'i')
3057                 && (data[++index] == 'c')) {
3058                 return TokenNamestatic;
3059               } else
3060                 return TokenNameIdentifier;
3061             else if (
3062               (data[index] == 'w')
3063                 && (data[++index] == 'i')
3064                 && (data[++index] == 't')
3065                 && (data[++index] == 'c')
3066                 && (data[++index] == 'h'))
3067               return TokenNameswitch;
3068             else
3069               return TokenNameIdentifier;
3070           default :
3071             return TokenNameIdentifier;
3072         }
3073
3074       case 't' : // true
3075         switch (length) {
3076
3077           case 4 :
3078             if ((data[++index] == 'r')
3079               && (data[++index] == 'u')
3080               && (data[++index] == 'e'))
3081               return TokenNametrue;
3082             else
3083               return TokenNameIdentifier;
3084             //            if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
3085             //              return TokenNamethis;
3086
3087           default :
3088             return TokenNameIdentifier;
3089         }
3090
3091       case 'v' : //var
3092         switch (length) {
3093           case 3 :
3094             if ((data[++index] == 'a') && (data[++index] == 'r'))
3095               return TokenNamevar;
3096             else
3097               return TokenNameIdentifier;
3098
3099           default :
3100             return TokenNameIdentifier;
3101         }
3102
3103       case 'w' : //while
3104         switch (length) {
3105           case 5 :
3106             if ((data[++index] == 'h')
3107               && (data[++index] == 'i')
3108               && (data[++index] == 'l')
3109               && (data[++index] == 'e'))
3110               return TokenNamewhile;
3111             else
3112               return TokenNameIdentifier;
3113             //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
3114             //return TokenNamewidefp ;
3115             //else
3116             //return TokenNameIdentifier;
3117           default :
3118             return TokenNameIdentifier;
3119         }
3120
3121       case 'x' : //xor
3122         switch (length) {
3123           case 3 :
3124             if ((data[++index] == 'o') && (data[++index] == 'r'))
3125               return TokenNameXOR;
3126             else
3127               return TokenNameIdentifier;
3128
3129           default :
3130             return TokenNameIdentifier;
3131         }
3132       default :
3133         return TokenNameIdentifier;
3134     }
3135   }
3136   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3137
3138     //when entering this method the currentCharacter is the firt
3139     //digit of the number , i.e. it may be preceeded by a . when
3140     //dotPrefix is true
3141
3142     boolean floating = dotPrefix;
3143     if ((!dotPrefix) && (currentCharacter == '0')) {
3144       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3145         //force the first char of the hexa number do exist...
3146         // consume next character
3147         unicodeAsBackSlash = false;
3148         currentCharacter = source[currentPosition++];
3149 //        if (((currentCharacter = source[currentPosition++]) == '\\')
3150 //          && (source[currentPosition] == 'u')) {
3151 //          getNextUnicodeChar();
3152 //        } else {
3153 //          if (withoutUnicodePtr != 0) {
3154 //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3155 //          }
3156 //        }
3157         if (Character.digit(currentCharacter, 16) == -1)
3158           throw new InvalidInputException(INVALID_HEXA);
3159         //---end forcing--
3160         while (getNextCharAsDigit(16)) {
3161         };
3162         //        if (getNextChar('l', 'L') >= 0)
3163         //          return TokenNameLongLiteral;
3164         //        else
3165         return TokenNameIntegerLiteral;
3166       }
3167
3168       //there is x or X in the number
3169       //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3170       if (getNextCharAsDigit()) {
3171         //-------------potential octal-----------------
3172         while (getNextCharAsDigit()) {
3173         };
3174
3175         //        if (getNextChar('l', 'L') >= 0) {
3176         //          return TokenNameLongLiteral;
3177         //        }
3178         //
3179         //        if (getNextChar('f', 'F') >= 0) {
3180         //          return TokenNameFloatingPointLiteral;
3181         //        }
3182
3183         if (getNextChar('d', 'D') >= 0) {
3184           return TokenNameDoubleLiteral;
3185         } else { //make the distinction between octal and float ....
3186           if (getNextChar('.')) { //bingo ! ....
3187             while (getNextCharAsDigit()) {
3188             };
3189             if (getNextChar('e', 'E') >= 0) {
3190               // consume next character
3191               unicodeAsBackSlash = false;
3192               currentCharacter = source[currentPosition++];
3193 //              if (((currentCharacter = source[currentPosition++]) == '\\')
3194 //                && (source[currentPosition] == 'u')) {
3195 //                getNextUnicodeChar();
3196 //              } else {
3197 //                if (withoutUnicodePtr != 0) {
3198 //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3199 //                }
3200 //              }
3201
3202               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3203                 // consume next character
3204                 unicodeAsBackSlash = false;
3205                 currentCharacter = source[currentPosition++];
3206 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3207 //                  && (source[currentPosition] == 'u')) {
3208 //                  getNextUnicodeChar();
3209 //                } else {
3210 //                  if (withoutUnicodePtr != 0) {
3211 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3212 //                      currentCharacter;
3213 //                  }
3214 //                }
3215               }
3216               if (!Character.isDigit(currentCharacter))
3217                 throw new InvalidInputException(INVALID_FLOAT);
3218               while (getNextCharAsDigit()) {
3219               };
3220             }
3221             //            if (getNextChar('f', 'F') >= 0)
3222             //              return TokenNameFloatingPointLiteral;
3223             getNextChar('d', 'D'); //jump over potential d or D
3224             return TokenNameDoubleLiteral;
3225           } else {
3226             return TokenNameIntegerLiteral;
3227           }
3228         }
3229       } else {
3230         /* carry on */
3231       }
3232     }
3233
3234     while (getNextCharAsDigit()) {
3235     };
3236
3237     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3238     //      return TokenNameLongLiteral;
3239
3240     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3241       while (getNextCharAsDigit()) {
3242       };
3243       floating = true;
3244     }
3245
3246     //if floating is true both exponant and suffix may be optional
3247
3248     if (getNextChar('e', 'E') >= 0) {
3249       floating = true;
3250       // consume next character
3251       unicodeAsBackSlash = false;
3252       currentCharacter = source[currentPosition++];
3253 //      if (((currentCharacter = source[currentPosition++]) == '\\')
3254 //        && (source[currentPosition] == 'u')) {
3255 //        getNextUnicodeChar();
3256 //      } else {
3257 //        if (withoutUnicodePtr != 0) {
3258 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3259 //        }
3260 //      }
3261
3262       if ((currentCharacter == '-')
3263         || (currentCharacter == '+')) { // consume next character
3264         unicodeAsBackSlash = false;
3265         currentCharacter = source[currentPosition++];
3266 //        if (((currentCharacter = source[currentPosition++]) == '\\')
3267 //          && (source[currentPosition] == 'u')) {
3268 //          getNextUnicodeChar();
3269 //        } else {
3270 //          if (withoutUnicodePtr != 0) {
3271 //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3272 //          }
3273 //        }
3274       }
3275       if (!Character.isDigit(currentCharacter))
3276         throw new InvalidInputException(INVALID_FLOAT);
3277       while (getNextCharAsDigit()) {
3278       };
3279     }
3280
3281     if (getNextChar('d', 'D') >= 0)
3282       return TokenNameDoubleLiteral;
3283     //    if (getNextChar('f', 'F') >= 0)
3284     //      return TokenNameFloatingPointLiteral;
3285
3286     //the long flag has been tested before
3287
3288     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3289   }
3290   /**
3291    * Search the line number corresponding to a specific position
3292    *
3293    */
3294   public final int getLineNumber(int position) {
3295
3296     if (lineEnds == null)
3297       return 1;
3298     int length = linePtr + 1;
3299     if (length == 0)
3300       return 1;
3301     int g = 0, d = length - 1;
3302     int m = 0;
3303     while (g <= d) {
3304       m = (g + d) / 2;
3305       if (position < lineEnds[m]) {
3306         d = m - 1;
3307       } else if (position > lineEnds[m]) {
3308         g = m + 1;
3309       } else {
3310         return m + 1;
3311       }
3312     }
3313     if (position < lineEnds[m]) {
3314       return m + 1;
3315     }
3316     return m + 2;
3317   }
3318
3319   public void setPHPMode(boolean mode) {
3320     phpMode = mode;
3321   }
3322
3323   public final void setSource(char[] source) {
3324     //the source-buffer is set to sourceString
3325
3326     if (source == null) {
3327       this.source = new char[0];
3328     } else {
3329       this.source = source;
3330     }
3331     startPosition = -1;
3332     initialPosition = currentPosition = 0;
3333     containsAssertKeyword = false;
3334     withoutUnicodeBuffer = new char[this.source.length];
3335
3336   }
3337
3338   public String toString() {
3339     if (startPosition == source.length)
3340       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3341     if (currentPosition > source.length)
3342       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3343
3344     char front[] = new char[startPosition];
3345     System.arraycopy(source, 0, front, 0, startPosition);
3346
3347     int middleLength = (currentPosition - 1) - startPosition + 1;
3348     char middle[];
3349     if (middleLength > -1) {
3350       middle = new char[middleLength];
3351       System.arraycopy(source, startPosition, middle, 0, middleLength);
3352     } else {
3353       middle = new char[0];
3354     }
3355
3356     char end[] = new char[source.length - (currentPosition - 1)];
3357     System.arraycopy(
3358       source,
3359       (currentPosition - 1) + 1,
3360       end,
3361       0,
3362       source.length - (currentPosition - 1) - 1);
3363
3364     return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3365     + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3366     + new String(end);
3367   }
3368   public final String toStringAction(int act) {
3369     switch (act) {
3370       case TokenNameERROR :
3371         return "ScannerError(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3372       case TokenNameStopPHP :
3373         return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3374       case TokenNameIdentifier :
3375         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3376       case TokenNameVariable :
3377         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3378       case TokenNameas :
3379         return "as"; //$NON-NLS-1$
3380       case TokenNamebreak :
3381         return "break"; //$NON-NLS-1$
3382       case TokenNamecase :
3383         return "case"; //$NON-NLS-1$
3384       case TokenNameclass :
3385         return "class"; //$NON-NLS-1$
3386       case TokenNamecontinue :
3387         return "continue"; //$NON-NLS-1$
3388       case TokenNamedefault :
3389         return "default"; //$NON-NLS-1$
3390       case TokenNamedefine :
3391         return "define"; //$NON-NLS-1$
3392       case TokenNamedo :
3393         return "do"; //$NON-NLS-1$
3394       case TokenNameecho :
3395         return "echo"; //$NON-NLS-1$
3396       case TokenNameelse :
3397         return "else"; //$NON-NLS-1$
3398       case TokenNameelseif :
3399         return "elseif"; //$NON-NLS-1$
3400       case TokenNameendfor :
3401         return "endfor"; //$NON-NLS-1$
3402       case TokenNameendforeach :
3403         return "endforeach"; //$NON-NLS-1$
3404       case TokenNameendif :
3405         return "endif"; //$NON-NLS-1$
3406       case TokenNameendswitch :
3407         return "endswitch"; //$NON-NLS-1$
3408       case TokenNameendwhile :
3409         return "endwhile"; //$NON-NLS-1$
3410       case TokenNameextends :
3411         return "extends"; //$NON-NLS-1$
3412       case TokenNamefalse :
3413         return "false"; //$NON-NLS-1$
3414       case TokenNamefor :
3415         return "for"; //$NON-NLS-1$
3416       case TokenNameforeach :
3417         return "foreach"; //$NON-NLS-1$
3418       case TokenNamefunction :
3419         return "function"; //$NON-NLS-1$
3420       case TokenNameglobal :
3421         return "global"; //$NON-NLS-1$
3422       case TokenNameif :
3423         return "if"; //$NON-NLS-1$
3424       case TokenNameinclude :
3425         return "include"; //$NON-NLS-1$
3426       case TokenNameinclude_once :
3427         return "include_once"; //$NON-NLS-1$
3428       case TokenNamelist :
3429         return "list"; //$NON-NLS-1$
3430       case TokenNamenew :
3431         return "new"; //$NON-NLS-1$
3432       case TokenNamenull :
3433         return "null"; //$NON-NLS-1$
3434       case TokenNameprint :
3435         return "print"; //$NON-NLS-1$
3436       case TokenNamerequire :
3437         return "require"; //$NON-NLS-1$
3438       case TokenNamerequire_once :
3439         return "require_once"; //$NON-NLS-1$
3440       case TokenNamereturn :
3441         return "return"; //$NON-NLS-1$
3442       case TokenNamestatic :
3443         return "static"; //$NON-NLS-1$
3444       case TokenNameswitch :
3445         return "switch"; //$NON-NLS-1$
3446       case TokenNametrue :
3447         return "true"; //$NON-NLS-1$
3448       case TokenNamevar :
3449         return "var"; //$NON-NLS-1$
3450       case TokenNamewhile :
3451         return "while"; //$NON-NLS-1$
3452       case TokenNameIntegerLiteral :
3453         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3454       case TokenNameDoubleLiteral :
3455         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3456       case TokenNameStringLiteral :
3457         return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3458       case TokenNameStringConstant :
3459         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3460       case TokenNameStringInterpolated :
3461         return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3462       case TokenNameHEREDOC :
3463         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3464
3465       case TokenNamePLUS_PLUS :
3466         return "++"; //$NON-NLS-1$
3467       case TokenNameMINUS_MINUS :
3468         return "--"; //$NON-NLS-1$
3469       case TokenNameEQUAL_EQUAL :
3470         return "=="; //$NON-NLS-1$
3471       case TokenNameEQUAL_GREATER :
3472         return "=>"; //$NON-NLS-1$
3473       case TokenNameLESS_EQUAL :
3474         return "<="; //$NON-NLS-1$
3475       case TokenNameGREATER_EQUAL :
3476         return ">="; //$NON-NLS-1$
3477       case TokenNameNOT_EQUAL :
3478         return "!="; //$NON-NLS-1$
3479       case TokenNameLEFT_SHIFT :
3480         return "<<"; //$NON-NLS-1$
3481       case TokenNameRIGHT_SHIFT :
3482         return ">>"; //$NON-NLS-1$
3483       case TokenNamePLUS_EQUAL :
3484         return "+="; //$NON-NLS-1$
3485       case TokenNameMINUS_EQUAL :
3486         return "-="; //$NON-NLS-1$
3487       case TokenNameMULTIPLY_EQUAL :
3488         return "*="; //$NON-NLS-1$
3489       case TokenNameDIVIDE_EQUAL :
3490         return "/="; //$NON-NLS-1$
3491       case TokenNameAND_EQUAL :
3492         return "&="; //$NON-NLS-1$
3493       case TokenNameOR_EQUAL :
3494         return "|="; //$NON-NLS-1$
3495       case TokenNameXOR_EQUAL :
3496         return "^="; //$NON-NLS-1$
3497       case TokenNameREMAINDER_EQUAL :
3498         return "%="; //$NON-NLS-1$
3499       case TokenNameLEFT_SHIFT_EQUAL :
3500         return "<<="; //$NON-NLS-1$
3501       case TokenNameRIGHT_SHIFT_EQUAL :
3502         return ">>="; //$NON-NLS-1$
3503       case TokenNameOR_OR :
3504         return "||"; //$NON-NLS-1$
3505       case TokenNameAND_AND :
3506         return "&&"; //$NON-NLS-1$
3507       case TokenNamePLUS :
3508         return "+"; //$NON-NLS-1$
3509       case TokenNameMINUS :
3510         return "-"; //$NON-NLS-1$
3511       case TokenNameMINUS_GREATER :
3512         return "->";
3513       case TokenNameNOT :
3514         return "!"; //$NON-NLS-1$
3515       case TokenNameREMAINDER :
3516         return "%"; //$NON-NLS-1$
3517       case TokenNameXOR :
3518         return "^"; //$NON-NLS-1$
3519       case TokenNameAND :
3520         return "&"; //$NON-NLS-1$
3521       case TokenNameMULTIPLY :
3522         return "*"; //$NON-NLS-1$
3523       case TokenNameOR :
3524         return "|"; //$NON-NLS-1$
3525       case TokenNameTWIDDLE :
3526         return "~"; //$NON-NLS-1$
3527       case TokenNameTWIDDLE_EQUAL :
3528         return "~="; //$NON-NLS-1$
3529       case TokenNameDIVIDE :
3530         return "/"; //$NON-NLS-1$
3531       case TokenNameGREATER :
3532         return ">"; //$NON-NLS-1$
3533       case TokenNameLESS :
3534         return "<"; //$NON-NLS-1$
3535       case TokenNameLPAREN :
3536         return "("; //$NON-NLS-1$
3537       case TokenNameRPAREN :
3538         return ")"; //$NON-NLS-1$
3539       case TokenNameLBRACE :
3540         return "{"; //$NON-NLS-1$
3541       case TokenNameRBRACE :
3542         return "}"; //$NON-NLS-1$
3543       case TokenNameLBRACKET :
3544         return "["; //$NON-NLS-1$
3545       case TokenNameRBRACKET :
3546         return "]"; //$NON-NLS-1$
3547       case TokenNameSEMICOLON :
3548         return ";"; //$NON-NLS-1$
3549       case TokenNameQUESTION :
3550         return "?"; //$NON-NLS-1$
3551       case TokenNameCOLON :
3552         return ":"; //$NON-NLS-1$
3553       case TokenNameCOMMA :
3554         return ","; //$NON-NLS-1$
3555       case TokenNameDOT :
3556         return "."; //$NON-NLS-1$
3557       case TokenNameEQUAL :
3558         return "="; //$NON-NLS-1$
3559       case TokenNameAT :
3560         return "@";
3561       case TokenNameDOLLAR_LBRACE :
3562         return "${";
3563       case TokenNameEOF :
3564         return "EOF"; //$NON-NLS-1$
3565       default :
3566         return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3567     }
3568   }
3569
3570   public Scanner(
3571     boolean tokenizeComments,
3572     boolean tokenizeWhiteSpace,
3573     boolean checkNonExternalizedStringLiterals) {
3574     this(
3575       tokenizeComments,
3576       tokenizeWhiteSpace,
3577       checkNonExternalizedStringLiterals,
3578       false);
3579   }
3580
3581   public Scanner(
3582     boolean tokenizeComments,
3583     boolean tokenizeWhiteSpace,
3584     boolean checkNonExternalizedStringLiterals,
3585     boolean assertMode) {
3586     this.eofPosition = Integer.MAX_VALUE;
3587     this.tokenizeComments = tokenizeComments;
3588     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3589     this.checkNonExternalizedStringLiterals =
3590       checkNonExternalizedStringLiterals;
3591     this.assertMode = assertMode;
3592   }
3593
3594   private void checkNonExternalizeString() throws InvalidInputException {
3595     if (currentLine == null)
3596       return;
3597     parseTags(currentLine);
3598   }
3599
3600   private void parseTags(NLSLine line) throws InvalidInputException {
3601     String s = new String(getCurrentTokenSource());
3602     int pos = s.indexOf(TAG_PREFIX);
3603     int lineLength = line.size();
3604     while (pos != -1) {
3605       int start = pos + TAG_PREFIX_LENGTH;
3606       int end = s.indexOf(TAG_POSTFIX, start);
3607       String index = s.substring(start, end);
3608       int i = 0;
3609       try {
3610         i = Integer.parseInt(index) - 1;
3611         // Tags are one based not zero based.
3612       } catch (NumberFormatException e) {
3613         i = -1; // we don't want to consider this as a valid NLS tag
3614       }
3615       if (line.exists(i)) {
3616         line.set(i, null);
3617       }
3618       pos = s.indexOf(TAG_PREFIX, start);
3619     }
3620
3621     this.nonNLSStrings = new StringLiteral[lineLength];
3622     int nonNLSCounter = 0;
3623     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3624       StringLiteral literal = (StringLiteral) iterator.next();
3625       if (literal != null) {
3626         this.nonNLSStrings[nonNLSCounter++] = literal;
3627       }
3628     }
3629     if (nonNLSCounter == 0) {
3630       this.nonNLSStrings = null;
3631       currentLine = null;
3632       return;
3633     }
3634     this.wasNonExternalizedStringLiteral = true;
3635     if (nonNLSCounter != lineLength) {
3636       System.arraycopy(
3637         this.nonNLSStrings,
3638         0,
3639         (this.nonNLSStrings = new StringLiteral[nonNLSCounter]),
3640         0,
3641         nonNLSCounter);
3642     }
3643     currentLine = null;
3644   }
3645 }