net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java

   1 /***********************************************************************************************************************************
   2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
   3  * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
   4  * and is available at http://www.eclipse.org/legal/cpl-v05.html
   5  *
   6  * Contributors: IBM Corporation - initial API and implementation
   7  **********************************************************************************************************************************/
   8 package net.sourceforge.phpdt.internal.compiler.parser;
   9
  10 import java.util.ArrayList;
  11 import java.util.Iterator;
  12 import java.util.List;
  13 import java.util.Stack;
  14
  15 import net.sourceforge.phpdt.core.compiler.CharOperation;
  16 import net.sourceforge.phpdt.core.compiler.IScanner;
  17 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
  18 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
  19 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
  20 import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
  21
  22 public class Scanner implements IScanner, ITerminalSymbols {
  23   /*
  24    * APIs ares - getNextToken() which return the current type of the token (this value is not memorized by the scanner) -
  25    * getCurrentTokenSource() which provides with the token "REAL" source (aka all unicode have been transformed into a correct char) -
  26    * sourceStart gives the position into the stream - currentPosition-1 gives the sourceEnd position into the stream
  27    */
  28   // 1.4 feature
  29   private boolean assertMode;
  30
  31   public boolean useAssertAsAnIndentifier = false;
  32
  33   //flag indicating if processed source contains occurrences of keyword assert
  34   public boolean containsAssertKeyword = false;
  35
  36   public boolean recordLineSeparator;
  37
  38   public boolean ignorePHPOneLiner = false;
  39
  40   public boolean phpMode = false;
  41
  42   public Stack encapsedStringStack = null;
  43
  44   public char currentCharacter;
  45
  46   public int startPosition;
  47
  48   public int currentPosition;
  49
  50   public int initialPosition, eofPosition;
  51
  52   // after this position eof are generated instead of real token from the
  53   // source
  54   public boolean tokenizeComments;
  55
  56   public boolean tokenizeWhiteSpace;
  57
  58   public boolean tokenizeStrings;
  59
  60   //source should be viewed as a window (aka a part)
  61   //of a entire very large stream
  62   public char source[];
  63
  64   //unicode support
  65   public char[] withoutUnicodeBuffer;
  66
  67   public int withoutUnicodePtr;
  68
  69   //when == 0 ==> no unicode in the current token
  70   public boolean unicodeAsBackSlash = false;
  71
  72   public boolean scanningFloatLiteral = false;
  73
  74   //support for /** comments
  75   public int[] commentStops = new int[10];
  76
  77   public int[] commentStarts = new int[10];
  78
  79   public int commentPtr = -1; // no comment test with commentPtr value -1
  80
  81   protected int lastCommentLinePosition = -1;
  82
  83   //diet parsing support - jump over some method body when requested
  84   public boolean diet = false;
  85
  86   //support for the poor-line-debuggers ....
  87   //remember the position of the cr/lf
  88   public int[] lineEnds = new int[250];
  89
  90   public int linePtr = -1;
  91
  92   public boolean wasAcr = false;
  93
  94   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
  95
  96   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
  97
  98   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
  99
 100   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
 101
 102   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
 103
 104   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
 105
 106   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
 107
 108   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
 109
 110   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
 111
 112   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
 113
 114   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
 115
 116   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
 117
 118   //----------------optimized identifier managment------------------
 119   static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
 120       charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
 121       charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
 122       charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
 123       charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
 124       charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
 125       charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
 126       charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
 127       charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
 128
 129   static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
 130
 131   static final int TableSize = 30, InternalTableSize = 6;
 132
 133   //30*6 = 180 entries
 134   public static final int OptimizedLength = 6;
 135
 136   public/* static */
 137   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
 138
 139   // support for detecting non-externalized string literals
 140   int currentLineNr = -1;
 141
 142   int previousLineNr = -1;
 143
 144   NLSLine currentLine = null;
 145
 146   List lines = new ArrayList();
 147
 148   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
 149
 150   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
 151
 152   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
 153
 154   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
 155
 156   public StringLiteral[] nonNLSStrings = null;
 157
 158   public boolean checkNonExternalizedStringLiterals = true;
 159
 160   public boolean wasNonExternalizedStringLiteral = false;
 161   /* static */{
 162     for (int i = 0; i < 6; i++) {
 163       for (int j = 0; j < TableSize; j++) {
 164         for (int k = 0; k < InternalTableSize; k++) {
 165           charArray_length[i][j][k] = initCharArray;
 166         }
 167       }
 168     }
 169   }
 170
 171   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
 172
 173   public static final int RoundBracket = 0;
 174
 175   public static final int SquareBracket = 1;
 176
 177   public static final int CurlyBracket = 2;
 178
 179   public static final int BracketKinds = 3;
 180
 181   // task tag support
 182   public char[][] foundTaskTags = null;
 183
 184   public char[][] foundTaskMessages;
 185
 186   public char[][] foundTaskPriorities = null;
 187
 188   public int[][] foundTaskPositions;
 189
 190   public int foundTaskCount = 0;
 191
 192   public char[][] taskTags = null;
 193
 194   public char[][] taskPriorities = null;
 195   public boolean isTaskCaseSensitive = true;
 196   public static final boolean DEBUG = false;
 197
 198   public static final boolean TRACE = false;
 199
 200   public ICompilationUnit compilationUnit = null;
 201   /**
 202    * Determines if the specified character is permissible
 203    * as the first character in a PHP identifier.
 204    *
 205    * The '$' character for HP variables isn't regarded as the first character !
 206    */
 207   public static boolean isPHPIdentifierStart(char ch) {
 208     return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 209   }
 210
 211   /**
 212    * Determines if the specified character may be part of a PHP
 213    * identifier as other than the first character
 214    */
 215   public static boolean isPHPIdentifierPart(char ch) {
 216     return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 217   }
 218
 219   public final boolean atEnd() {
 220     // This code is not relevant if source is
 221     // Only a part of the real stream input
 222     return source.length == currentPosition;
 223   }
 224
 225   public char[] getCurrentIdentifierSource() {
 226     //return the token REAL source (aka unicodes are precomputed)
 227     char[] result;
 228     //    if (withoutUnicodePtr != 0)
 229     //      //0 is used as a fast test flag so the real first char is in position 1
 230     //      System.arraycopy(
 231     //        withoutUnicodeBuffer,
 232     //        1,
 233     //        result = new char[withoutUnicodePtr],
 234     //        0,
 235     //        withoutUnicodePtr);
 236     //    else {
 237     int length = currentPosition - startPosition;
 238     switch (length) { // see OptimizedLength
 239     case 1:
 240       return optimizedCurrentTokenSource1();
 241     case 2:
 242       return optimizedCurrentTokenSource2();
 243     case 3:
 244       return optimizedCurrentTokenSource3();
 245     case 4:
 246       return optimizedCurrentTokenSource4();
 247     case 5:
 248       return optimizedCurrentTokenSource5();
 249     case 6:
 250       return optimizedCurrentTokenSource6();
 251     }
 252     //no optimization
 253     System.arraycopy(source, startPosition, result = new char[length], 0, length);
 254     //   }
 255     return result;
 256   }
 257
 258   public int getCurrentTokenEndPosition() {
 259     return this.currentPosition - 1;
 260   }
 261
 262   public final char[] getCurrentTokenSource() {
 263     // Return the token REAL source (aka unicodes are precomputed)
 264     char[] result;
 265     //    if (withoutUnicodePtr != 0)
 266     //      // 0 is used as a fast test flag so the real first char is in position 1
 267     //      System.arraycopy(
 268     //        withoutUnicodeBuffer,
 269     //        1,
 270     //        result = new char[withoutUnicodePtr],
 271     //        0,
 272     //        withoutUnicodePtr);
 273     //    else {
 274     int length;
 275     System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
 276     //    }
 277     return result;
 278   }
 279
 280   public final char[] getCurrentTokenSource(int startPos) {
 281     // Return the token REAL source (aka unicodes are precomputed)
 282     char[] result;
 283     //    if (withoutUnicodePtr != 0)
 284     //      // 0 is used as a fast test flag so the real first char is in position 1
 285     //      System.arraycopy(
 286     //        withoutUnicodeBuffer,
 287     //        1,
 288     //        result = new char[withoutUnicodePtr],
 289     //        0,
 290     //        withoutUnicodePtr);
 291     //    else {
 292     int length;
 293     System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
 294     //  }
 295     return result;
 296   }
 297
 298   public final char[] getCurrentTokenSourceString() {
 299     //return the token REAL source (aka unicodes are precomputed).
 300     //REMOVE the two " that are at the beginning and the end.
 301     char[] result;
 302     if (withoutUnicodePtr != 0)
 303       //0 is used as a fast test flag so the real first char is in position 1
 304       System.arraycopy(withoutUnicodeBuffer, 2,
 305       //2 is 1 (real start) + 1 (to jump over the ")
 306           result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
 307     else {
 308       int length;
 309       System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 310     }
 311     return result;
 312   }
 313
 314   public int getCurrentTokenStartPosition() {
 315     return this.startPosition;
 316   }
 317
 318   public final char[] getCurrentStringLiteralSource() {
 319     // Return the token REAL source (aka unicodes are precomputed)
 320     if (startPosition + 1 >= currentPosition) {
 321       return new char[0];
 322     }
 323     char[] result;
 324     int length;
 325     System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 326     //    }
 327     return result;
 328   }
 329
 330   public final char[] getCurrentStringLiteralSource(int startPos) {
 331     // Return the token REAL source (aka unicodes are precomputed)
 332     char[] result;
 333     int length;
 334     System.arraycopy(source, startPos + 1, result = new char[length = currentPosition - startPos - 2], 0, length);
 335     //    }
 336     return result;
 337   }
 338   /*
 339    * Search the source position corresponding to the end of a given line number
 340    *
 341    * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
 342    *
 343    * In case the given line number is inconsistent, answers -1.
 344    */
 345   public final int getLineEnd(int lineNumber) {
 346     if (lineEnds == null)
 347       return -1;
 348     if (lineNumber >= lineEnds.length)
 349       return -1;
 350     if (lineNumber <= 0)
 351       return -1;
 352     if (lineNumber == lineEnds.length - 1)
 353       return eofPosition;
 354     return lineEnds[lineNumber - 1];
 355     // next line start one character behind the lineEnd of the previous line
 356   }
 357
 358   /**
 359    * Search the source position corresponding to the beginning of a given line number
 360    *
 361    * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
 362    *
 363    * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
 364    *
 365    * In case the given line number is inconsistent, answers -1.
 366    */
 367   public final int getLineStart(int lineNumber) {
 368     if (lineEnds == null)
 369       return -1;
 370     if (lineNumber >= lineEnds.length)
 371       return -1;
 372     if (lineNumber <= 0)
 373       return -1;
 374     if (lineNumber == 1)
 375       return initialPosition;
 376     return lineEnds[lineNumber - 2] + 1;
 377     // next line start one character behind the lineEnd of the previous line
 378   }
 379
 380   public final boolean getNextChar(char testedChar) {
 381     //BOOLEAN
 382     //handle the case of unicode.
 383     //when a unicode appears then we must use a buffer that holds char
 384     // internal values
 385     //At the end of this method currentCharacter holds the new visited char
 386     //and currentPosition points right next after it
 387     //Both previous lines are true if the currentCharacter is == to the
 388     // testedChar
 389     //On false, no side effect has occured.
 390     //ALL getNextChar.... ARE OPTIMIZED COPIES
 391     int temp = currentPosition;
 392     try {
 393       currentCharacter = source[currentPosition++];
 394       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 395       //        && (source[currentPosition] == 'u')) {
 396       //        //-------------unicode traitement ------------
 397       //        int c1, c2, c3, c4;
 398       //        int unicodeSize = 6;
 399       //        currentPosition++;
 400       //        while (source[currentPosition] == 'u') {
 401       //          currentPosition++;
 402       //          unicodeSize++;
 403       //        }
 404       //
 405       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 406       //          || c1 < 0)
 407       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 408       //            || c2 < 0)
 409       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 410       //            || c3 < 0)
 411       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 412       //            || c4 < 0)) {
 413       //          currentPosition = temp;
 414       //          return false;
 415       //        }
 416       //
 417       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 418       //        if (currentCharacter != testedChar) {
 419       //          currentPosition = temp;
 420       //          return false;
 421       //        }
 422       //        unicodeAsBackSlash = currentCharacter == '\\';
 423       //
 424       //        //need the unicode buffer
 425       //        if (withoutUnicodePtr == 0) {
 426       //          //buffer all the entries that have been left aside....
 427       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 428       //          System.arraycopy(
 429       //            source,
 430       //            startPosition,
 431       //            withoutUnicodeBuffer,
 432       //            1,
 433       //            withoutUnicodePtr);
 434       //        }
 435       //        //fill the buffer with the char
 436       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 437       //        return true;
 438       //
 439       //      } //-------------end unicode traitement--------------
 440       //      else {
 441       if (currentCharacter != testedChar) {
 442         currentPosition = temp;
 443         return false;
 444       }
 445       unicodeAsBackSlash = false;
 446       //        if (withoutUnicodePtr != 0)
 447       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 448       return true;
 449       //      }
 450     } catch (IndexOutOfBoundsException e) {
 451       unicodeAsBackSlash = false;
 452       currentPosition = temp;
 453       return false;
 454     }
 455   }
 456
 457   public final int getNextChar(char testedChar1, char testedChar2) {
 458     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
 459     //test can be done with (x==0) for the first and (x>0) for the second
 460     //handle the case of unicode.
 461     //when a unicode appears then we must use a buffer that holds char
 462     // internal values
 463     //At the end of this method currentCharacter holds the new visited char
 464     //and currentPosition points right next after it
 465     //Both previous lines are true if the currentCharacter is == to the
 466     // testedChar1/2
 467     //On false, no side effect has occured.
 468     //ALL getNextChar.... ARE OPTIMIZED COPIES
 469     int temp = currentPosition;
 470     try {
 471       int result;
 472       currentCharacter = source[currentPosition++];
 473       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 474       //        && (source[currentPosition] == 'u')) {
 475       //        //-------------unicode traitement ------------
 476       //        int c1, c2, c3, c4;
 477       //        int unicodeSize = 6;
 478       //        currentPosition++;
 479       //        while (source[currentPosition] == 'u') {
 480       //          currentPosition++;
 481       //          unicodeSize++;
 482       //        }
 483       //
 484       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 485       //          || c1 < 0)
 486       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 487       //            || c2 < 0)
 488       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 489       //            || c3 < 0)
 490       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 491       //            || c4 < 0)) {
 492       //          currentPosition = temp;
 493       //          return 2;
 494       //        }
 495       //
 496       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 497       //        if (currentCharacter == testedChar1)
 498       //          result = 0;
 499       //        else if (currentCharacter == testedChar2)
 500       //          result = 1;
 501       //        else {
 502       //          currentPosition = temp;
 503       //          return -1;
 504       //        }
 505       //
 506       //        //need the unicode buffer
 507       //        if (withoutUnicodePtr == 0) {
 508       //          //buffer all the entries that have been left aside....
 509       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 510       //          System.arraycopy(
 511       //            source,
 512       //            startPosition,
 513       //            withoutUnicodeBuffer,
 514       //            1,
 515       //            withoutUnicodePtr);
 516       //        }
 517       //        //fill the buffer with the char
 518       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 519       //        return result;
 520       //      } //-------------end unicode traitement--------------
 521       //      else {
 522       if (currentCharacter == testedChar1)
 523         result = 0;
 524       else if (currentCharacter == testedChar2)
 525         result = 1;
 526       else {
 527         currentPosition = temp;
 528         return -1;
 529       }
 530       //        if (withoutUnicodePtr != 0)
 531       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 532       return result;
 533       //     }
 534     } catch (IndexOutOfBoundsException e) {
 535       currentPosition = temp;
 536       return -1;
 537     }
 538   }
 539
 540   public final boolean getNextCharAsDigit() {
 541     //BOOLEAN
 542     //handle the case of unicode.
 543     //when a unicode appears then we must use a buffer that holds char
 544     // internal values
 545     //At the end of this method currentCharacter holds the new visited char
 546     //and currentPosition points right next after it
 547     //Both previous lines are true if the currentCharacter is a digit
 548     //On false, no side effect has occured.
 549     //ALL getNextChar.... ARE OPTIMIZED COPIES
 550     int temp = currentPosition;
 551     try {
 552       currentCharacter = source[currentPosition++];
 553       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 554       //        && (source[currentPosition] == 'u')) {
 555       //        //-------------unicode traitement ------------
 556       //        int c1, c2, c3, c4;
 557       //        int unicodeSize = 6;
 558       //        currentPosition++;
 559       //        while (source[currentPosition] == 'u') {
 560       //          currentPosition++;
 561       //          unicodeSize++;
 562       //        }
 563       //
 564       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 565       //          || c1 < 0)
 566       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 567       //            || c2 < 0)
 568       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 569       //            || c3 < 0)
 570       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 571       //            || c4 < 0)) {
 572       //          currentPosition = temp;
 573       //          return false;
 574       //        }
 575       //
 576       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 577       //        if (!Character.isDigit(currentCharacter)) {
 578       //          currentPosition = temp;
 579       //          return false;
 580       //        }
 581       //
 582       //        //need the unicode buffer
 583       //        if (withoutUnicodePtr == 0) {
 584       //          //buffer all the entries that have been left aside....
 585       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 586       //          System.arraycopy(
 587       //            source,
 588       //            startPosition,
 589       //            withoutUnicodeBuffer,
 590       //            1,
 591       //            withoutUnicodePtr);
 592       //        }
 593       //        //fill the buffer with the char
 594       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 595       //        return true;
 596       //      } //-------------end unicode traitement--------------
 597       //      else {
 598       if (!Character.isDigit(currentCharacter)) {
 599         currentPosition = temp;
 600         return false;
 601       }
 602       //        if (withoutUnicodePtr != 0)
 603       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 604       return true;
 605       //      }
 606     } catch (IndexOutOfBoundsException e) {
 607       currentPosition = temp;
 608       return false;
 609     }
 610   }
 611
 612   public final boolean getNextCharAsDigit(int radix) {
 613     //BOOLEAN
 614     //handle the case of unicode.
 615     //when a unicode appears then we must use a buffer that holds char
 616     // internal values
 617     //At the end of this method currentCharacter holds the new visited char
 618     //and currentPosition points right next after it
 619     //Both previous lines are true if the currentCharacter is a digit base on
 620     // radix
 621     //On false, no side effect has occured.
 622     //ALL getNextChar.... ARE OPTIMIZED COPIES
 623     int temp = currentPosition;
 624     try {
 625       currentCharacter = source[currentPosition++];
 626       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 627       //        && (source[currentPosition] == 'u')) {
 628       //        //-------------unicode traitement ------------
 629       //        int c1, c2, c3, c4;
 630       //        int unicodeSize = 6;
 631       //        currentPosition++;
 632       //        while (source[currentPosition] == 'u') {
 633       //          currentPosition++;
 634       //          unicodeSize++;
 635       //        }
 636       //
 637       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 638       //          || c1 < 0)
 639       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 640       //            || c2 < 0)
 641       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 642       //            || c3 < 0)
 643       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 644       //            || c4 < 0)) {
 645       //          currentPosition = temp;
 646       //          return false;
 647       //        }
 648       //
 649       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 650       //        if (Character.digit(currentCharacter, radix) == -1) {
 651       //          currentPosition = temp;
 652       //          return false;
 653       //        }
 654       //
 655       //        //need the unicode buffer
 656       //        if (withoutUnicodePtr == 0) {
 657       //          //buffer all the entries that have been left aside....
 658       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 659       //          System.arraycopy(
 660       //            source,
 661       //            startPosition,
 662       //            withoutUnicodeBuffer,
 663       //            1,
 664       //            withoutUnicodePtr);
 665       //        }
 666       //        //fill the buffer with the char
 667       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 668       //        return true;
 669       //      } //-------------end unicode traitement--------------
 670       //      else {
 671       if (Character.digit(currentCharacter, radix) == -1) {
 672         currentPosition = temp;
 673         return false;
 674       }
 675       //        if (withoutUnicodePtr != 0)
 676       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 677       return true;
 678       //      }
 679     } catch (IndexOutOfBoundsException e) {
 680       currentPosition = temp;
 681       return false;
 682     }
 683   }
 684
 685   public boolean getNextCharAsJavaIdentifierPart() {
 686     //BOOLEAN
 687     //handle the case of unicode.
 688     //when a unicode appears then we must use a buffer that holds char
 689     // internal values
 690     //At the end of this method currentCharacter holds the new visited char
 691     //and currentPosition points right next after it
 692     //Both previous lines are true if the currentCharacter is a
 693     // JavaIdentifierPart
 694     //On false, no side effect has occured.
 695     //ALL getNextChar.... ARE OPTIMIZED COPIES
 696     int temp = currentPosition;
 697     try {
 698       currentCharacter = source[currentPosition++];
 699       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 700       //        && (source[currentPosition] == 'u')) {
 701       //        //-------------unicode traitement ------------
 702       //        int c1, c2, c3, c4;
 703       //        int unicodeSize = 6;
 704       //        currentPosition++;
 705       //        while (source[currentPosition] == 'u') {
 706       //          currentPosition++;
 707       //          unicodeSize++;
 708       //        }
 709       //
 710       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 711       //          || c1 < 0)
 712       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 713       //            || c2 < 0)
 714       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 715       //            || c3 < 0)
 716       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 717       //            || c4 < 0)) {
 718       //          currentPosition = temp;
 719       //          return false;
 720       //        }
 721       //
 722       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 723       //        if (!isPHPIdentifierPart(currentCharacter)) {
 724       //          currentPosition = temp;
 725       //          return false;
 726       //        }
 727       //
 728       //        //need the unicode buffer
 729       //        if (withoutUnicodePtr == 0) {
 730       //          //buffer all the entries that have been left aside....
 731       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 732       //          System.arraycopy(
 733       //            source,
 734       //            startPosition,
 735       //            withoutUnicodeBuffer,
 736       //            1,
 737       //            withoutUnicodePtr);
 738       //        }
 739       //        //fill the buffer with the char
 740       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 741       //        return true;
 742       //      } //-------------end unicode traitement--------------
 743       //      else {
 744       if (!isPHPIdentifierPart(currentCharacter)) {
 745         currentPosition = temp;
 746         return false;
 747       }
 748       //        if (withoutUnicodePtr != 0)
 749       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 750       return true;
 751       //      }
 752     } catch (IndexOutOfBoundsException e) {
 753       currentPosition = temp;
 754       return false;
 755     }
 756   }
 757
 758   public int getCastOrParen() {
 759     int tempPosition = currentPosition;
 760     char tempCharacter = currentCharacter;
 761     int tempToken = TokenNameLPAREN;
 762     boolean found = false;
 763     StringBuffer buf = new StringBuffer();
 764     try {
 765       do {
 766         currentCharacter = source[currentPosition++];
 767       } while (currentCharacter == ' ' || currentCharacter == '\t');
 768       while ((currentCharacter >= 'a' && currentCharacter <= 'z') || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
 769         buf.append(currentCharacter);
 770         currentCharacter = source[currentPosition++];
 771       }
 772       if (buf.length() >= 3 && buf.length() <= 7) {
 773         char[] data = buf.toString().toCharArray();
 774         int index = 0;
 775         switch (data.length) {
 776         case 3:
 777           // int
 778           if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
 779             found = true;
 780             tempToken = TokenNameintCAST;
 781           }
 782           break;
 783         case 4:
 784           // bool real
 785           if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
 786             found = true;
 787             tempToken = TokenNameboolCAST;
 788           } else {
 789             index = 0;
 790             if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
 791               found = true;
 792               tempToken = TokenNamedoubleCAST;
 793             }
 794           }
 795           break;
 796         case 5:
 797           // array unset float
 798           if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
 799               && (data[++index] == 'y')) {
 800             found = true;
 801             tempToken = TokenNamearrayCAST;
 802           } else {
 803             index = 0;
 804             if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
 805                 && (data[++index] == 't')) {
 806               found = true;
 807               tempToken = TokenNameunsetCAST;
 808             } else {
 809               index = 0;
 810               if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
 811                   && (data[++index] == 't')) {
 812                 found = true;
 813                 tempToken = TokenNamedoubleCAST;
 814               }
 815             }
 816           }
 817           break;
 818         case 6:
 819           // object string double
 820           if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
 821               && (data[++index] == 'c') && (data[++index] == 't')) {
 822             found = true;
 823             tempToken = TokenNameobjectCAST;
 824           } else {
 825             index = 0;
 826             if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
 827                 && (data[++index] == 'n') && (data[++index] == 'g')) {
 828               found = true;
 829               tempToken = TokenNamestringCAST;
 830             } else {
 831               index = 0;
 832               if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
 833                   && (data[++index] == 'l') && (data[++index] == 'e')) {
 834                 found = true;
 835                 tempToken = TokenNamedoubleCAST;
 836               }
 837             }
 838           }
 839           break;
 840         case 7:
 841           // boolean integer
 842           if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
 843               && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
 844             found = true;
 845             tempToken = TokenNameboolCAST;
 846           } else {
 847             index = 0;
 848             if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
 849                 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
 850               found = true;
 851               tempToken = TokenNameintCAST;
 852             }
 853           }
 854           break;
 855         }
 856         if (found) {
 857           while (currentCharacter == ' ' || currentCharacter == '\t') {
 858             currentCharacter = source[currentPosition++];
 859           }
 860           if (currentCharacter == ')') {
 861             return tempToken;
 862           }
 863         }
 864       }
 865     } catch (IndexOutOfBoundsException e) {
 866     }
 867     currentCharacter = tempCharacter;
 868     currentPosition = tempPosition;
 869     return TokenNameLPAREN;
 870   }
 871
 872   public void consumeStringInterpolated() throws InvalidInputException {
 873     try {
 874       // consume next character
 875       unicodeAsBackSlash = false;
 876       currentCharacter = source[currentPosition++];
 877       //                if (((currentCharacter = source[currentPosition++]) == '\\')
 878       //                  && (source[currentPosition] == 'u')) {
 879       //                  getNextUnicodeChar();
 880       //                } else {
 881       //                  if (withoutUnicodePtr != 0) {
 882       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
 883       //                      currentCharacter;
 884       //                  }
 885       //                }
 886       while (currentCharacter != '`') {
 887         /** ** in PHP \r and \n are valid in string literals *** */
 888         //                if ((currentCharacter == '\n')
 889         //                  || (currentCharacter == '\r')) {
 890         //                  // relocate if finding another quote fairly close: thus unicode
 891         // '/u000D' will be fully consumed
 892         //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 893         //                    if (currentPosition + lookAhead == source.length)
 894         //                      break;
 895         //                    if (source[currentPosition + lookAhead] == '\n')
 896         //                      break;
 897         //                    if (source[currentPosition + lookAhead] == '\"') {
 898         //                      currentPosition += lookAhead + 1;
 899         //                      break;
 900         //                    }
 901         //                  }
 902         //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
 903         //                }
 904         if (currentCharacter == '\\') {
 905           int escapeSize = currentPosition;
 906           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
 907           //scanEscapeCharacter make a side effect on this value and we need
 908           // the previous value few lines down this one
 909           scanDoubleQuotedEscapeCharacter();
 910           escapeSize = currentPosition - escapeSize;
 911           if (withoutUnicodePtr == 0) {
 912             //buffer all the entries that have been left aside....
 913             withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
 914             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
 915             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 916           } else { //overwrite the / in the buffer
 917             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
 918             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
 919               // where only one is correct
 920               withoutUnicodePtr--;
 921             }
 922           }
 923         }
 924         // consume next character
 925         unicodeAsBackSlash = false;
 926         currentCharacter = source[currentPosition++];
 927         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 928         //                    && (source[currentPosition] == 'u')) {
 929         //                    getNextUnicodeChar();
 930         //                  } else {
 931         if (withoutUnicodePtr != 0) {
 932           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 933         }
 934         //                  }
 935       }
 936     } catch (IndexOutOfBoundsException e) {
 937       //    reset end position for error reporting
 938       currentPosition -= 2;
 939       throw new InvalidInputException(UNTERMINATED_STRING);
 940     } catch (InvalidInputException e) {
 941       if (e.getMessage().equals(INVALID_ESCAPE)) {
 942         // relocate if finding another quote fairly close: thus unicode
 943         // '/u000D' will be fully consumed
 944         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 945           if (currentPosition + lookAhead == source.length)
 946             break;
 947           if (source[currentPosition + lookAhead] == '\n')
 948             break;
 949           if (source[currentPosition + lookAhead] == '`') {
 950             currentPosition += lookAhead + 1;
 951             break;
 952           }
 953         }
 954       }
 955       throw e; // rethrow
 956     }
 957     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
 958       // //$NON-NLS-?$ where ? is an
 959       // int.
 960       if (currentLine == null) {
 961         currentLine = new NLSLine();
 962         lines.add(currentLine);
 963       }
 964       currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
 965     }
 966   }
 967
 968   public void consumeStringConstant() throws InvalidInputException {
 969     try {
 970       // consume next character
 971       unicodeAsBackSlash = false;
 972       currentCharacter = source[currentPosition++];
 973       //                if (((currentCharacter = source[currentPosition++]) == '\\')
 974       //                  && (source[currentPosition] == 'u')) {
 975       //                  getNextUnicodeChar();
 976       //                } else {
 977       //                  if (withoutUnicodePtr != 0) {
 978       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
 979       //                      currentCharacter;
 980       //                  }
 981       //                }
 982       while (currentCharacter != '\'') {
 983         /** ** in PHP \r and \n are valid in string literals *** */
 984         //                  if ((currentCharacter == '\n')
 985         //                    || (currentCharacter == '\r')) {
 986         //                    // relocate if finding another quote fairly close: thus unicode
 987         // '/u000D' will be fully consumed
 988         //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 989         //                      if (currentPosition + lookAhead == source.length)
 990         //                        break;
 991         //                      if (source[currentPosition + lookAhead] == '\n')
 992         //                        break;
 993         //                      if (source[currentPosition + lookAhead] == '\"') {
 994         //                        currentPosition += lookAhead + 1;
 995         //                        break;
 996         //                      }
 997         //                    }
 998         //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
 999         //                  }
1000         if (currentCharacter == '\\') {
1001           int escapeSize = currentPosition;
1002           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1003           //scanEscapeCharacter make a side effect on this value and we need
1004           // the previous value few lines down this one
1005           scanSingleQuotedEscapeCharacter();
1006           escapeSize = currentPosition - escapeSize;
1007           if (withoutUnicodePtr == 0) {
1008             //buffer all the entries that have been left aside....
1009             withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1010             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1011             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1012           } else { //overwrite the / in the buffer
1013             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1014             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1015               // where only one is correct
1016               withoutUnicodePtr--;
1017             }
1018           }
1019         }
1020         // consume next character
1021         unicodeAsBackSlash = false;
1022         currentCharacter = source[currentPosition++];
1023         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1024         //                    && (source[currentPosition] == 'u')) {
1025         //                    getNextUnicodeChar();
1026         //                  } else {
1027         if (withoutUnicodePtr != 0) {
1028           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1029         }
1030         //                  }
1031       }
1032     } catch (IndexOutOfBoundsException e) {
1033       // reset end position for error reporting
1034       currentPosition -= 2;
1035       throw new InvalidInputException(UNTERMINATED_STRING);
1036     } catch (InvalidInputException e) {
1037       if (e.getMessage().equals(INVALID_ESCAPE)) {
1038         // relocate if finding another quote fairly close: thus unicode
1039         // '/u000D' will be fully consumed
1040         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1041           if (currentPosition + lookAhead == source.length)
1042             break;
1043           if (source[currentPosition + lookAhead] == '\n')
1044             break;
1045           if (source[currentPosition + lookAhead] == '\'') {
1046             currentPosition += lookAhead + 1;
1047             break;
1048           }
1049         }
1050       }
1051       throw e; // rethrow
1052     }
1053     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1054       // //$NON-NLS-?$ where ? is an
1055       // int.
1056       if (currentLine == null) {
1057         currentLine = new NLSLine();
1058         lines.add(currentLine);
1059       }
1060       currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1061     }
1062   }
1063
1064   public void consumeStringLiteral() throws InvalidInputException {
1065     try {
1066       // consume next character
1067       unicodeAsBackSlash = false;
1068       currentCharacter = source[currentPosition++];
1069       //                if (((currentCharacter = source[currentPosition++]) == '\\')
1070       //                  && (source[currentPosition] == 'u')) {
1071       //                  getNextUnicodeChar();
1072       //                } else {
1073       //                  if (withoutUnicodePtr != 0) {
1074       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1075       //                      currentCharacter;
1076       //                  }
1077       //                }
1078       while (currentCharacter != '"') {
1079         /** ** in PHP \r and \n are valid in string literals *** */
1080         //                  if ((currentCharacter == '\n')
1081         //                    || (currentCharacter == '\r')) {
1082         //                    // relocate if finding another quote fairly close: thus unicode
1083         // '/u000D' will be fully consumed
1084         //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1085         //                      if (currentPosition + lookAhead == source.length)
1086         //                        break;
1087         //                      if (source[currentPosition + lookAhead] == '\n')
1088         //                        break;
1089         //                      if (source[currentPosition + lookAhead] == '\"') {
1090         //                        currentPosition += lookAhead + 1;
1091         //                        break;
1092         //                      }
1093         //                    }
1094         //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1095         //                  }
1096         if (currentCharacter == '\\') {
1097           int escapeSize = currentPosition;
1098           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1099           //scanEscapeCharacter make a side effect on this value and we need
1100           // the previous value few lines down this one
1101           scanDoubleQuotedEscapeCharacter();
1102           escapeSize = currentPosition - escapeSize;
1103           if (withoutUnicodePtr == 0) {
1104             //buffer all the entries that have been left aside....
1105             withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1106             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1107             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1108           } else { //overwrite the / in the buffer
1109             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1110             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1111               // where only one is correct
1112               withoutUnicodePtr--;
1113             }
1114           }
1115         }
1116         // consume next character
1117         unicodeAsBackSlash = false;
1118         currentCharacter = source[currentPosition++];
1119         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1120         //                    && (source[currentPosition] == 'u')) {
1121         //                    getNextUnicodeChar();
1122         //                  } else {
1123         if (withoutUnicodePtr != 0) {
1124           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1125         }
1126         //                  }
1127       }
1128     } catch (IndexOutOfBoundsException e) {
1129       //    reset end position for error reporting
1130       currentPosition -= 2;
1131       throw new InvalidInputException(UNTERMINATED_STRING);
1132     } catch (InvalidInputException e) {
1133       if (e.getMessage().equals(INVALID_ESCAPE)) {
1134         // relocate if finding another quote fairly close: thus unicode
1135         // '/u000D' will be fully consumed
1136         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1137           if (currentPosition + lookAhead == source.length)
1138             break;
1139           if (source[currentPosition + lookAhead] == '\n')
1140             break;
1141           if (source[currentPosition + lookAhead] == '\"') {
1142             currentPosition += lookAhead + 1;
1143             break;
1144           }
1145         }
1146       }
1147       throw e; // rethrow
1148     }
1149     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1150       // //$NON-NLS-?$ where ? is an
1151       // int.
1152       if (currentLine == null) {
1153         currentLine = new NLSLine();
1154         lines.add(currentLine);
1155       }
1156       currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1157     }
1158   }
1159
1160   public int getNextToken() throws InvalidInputException {
1161     if (!phpMode) {
1162       return getInlinedHTML(currentPosition);
1163     }
1164     if (phpMode) {
1165       this.wasAcr = false;
1166       if (diet) {
1167         jumpOverMethodBody();
1168         diet = false;
1169         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1170       }
1171       try {
1172         while (true) {
1173           withoutUnicodePtr = 0;
1174           //start with a new token
1175           char encapsedChar = ' ';
1176           if (!encapsedStringStack.isEmpty()) {
1177             encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
1178           }
1179           if (encapsedChar != '$' && encapsedChar != ' ') {
1180             currentCharacter = source[currentPosition++];
1181             if (currentCharacter == encapsedChar) {
1182               switch (currentCharacter) {
1183               case '`':
1184                 return TokenNameEncapsedString0;
1185               case '\'':
1186                 return TokenNameEncapsedString1;
1187               case '"':
1188                 return TokenNameEncapsedString2;
1189               }
1190             }
1191             while (currentCharacter != encapsedChar) {
1192               /** ** in PHP \r and \n are valid in string literals *** */
1193               switch (currentCharacter) {
1194               case '\\':
1195                 int escapeSize = currentPosition;
1196                 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1197                 //scanEscapeCharacter make a side effect on this value and
1198                 // we need the previous value few lines down this one
1199                 scanDoubleQuotedEscapeCharacter();
1200                 escapeSize = currentPosition - escapeSize;
1201                 if (withoutUnicodePtr == 0) {
1202                   //buffer all the entries that have been left aside....
1203                   withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1204                   System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1205                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1206                 } else { //overwrite the / in the buffer
1207                   withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1208                   if (backSlashAsUnicodeInString) { //there are TWO \ in
1209                     withoutUnicodePtr--;
1210                   }
1211                 }
1212                 break;
1213               case '$':
1214                 if (isPHPIdentifierStart(source[currentPosition]) || source[currentPosition] == '{') {
1215                   currentPosition--;
1216                   encapsedStringStack.push(new Character('$'));
1217                   return TokenNameSTRING;
1218                 }
1219                 break;
1220               case '{':
1221                 if (source[currentPosition] == '$') { // CURLY_OPEN
1222                   currentPosition--;
1223                   encapsedStringStack.push(new Character('$'));
1224                   return TokenNameSTRING;
1225                 }
1226               }
1227               // consume next character
1228               unicodeAsBackSlash = false;
1229               currentCharacter = source[currentPosition++];
1230               if (withoutUnicodePtr != 0) {
1231                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1232               }
1233               //                  }
1234             } // end while
1235             currentPosition--;
1236             return TokenNameSTRING;
1237           }
1238           // ---------Consume white space and handles startPosition---------
1239           int whiteStart = currentPosition;
1240           startPosition = currentPosition;
1241           currentCharacter = source[currentPosition++];
1242           if (encapsedChar == '$') {
1243             switch (currentCharacter) {
1244             case '\\':
1245               currentCharacter = source[currentPosition++];
1246               return TokenNameSTRING;
1247             case '{':
1248               if (encapsedChar == '$') {
1249                 if (getNextChar('$'))
1250                   return TokenNameLBRACE_DOLLAR;
1251               }
1252               return TokenNameLBRACE;
1253             case '}':
1254               return TokenNameRBRACE;
1255             case '[':
1256               return TokenNameLBRACKET;
1257             case ']':
1258               return TokenNameRBRACKET;
1259             case '\'':
1260               if (tokenizeStrings) {
1261                 consumeStringConstant();
1262                 return TokenNameStringSingleQuote;
1263               }
1264               return TokenNameEncapsedString1;
1265             case '"':
1266               return TokenNameEncapsedString2;
1267             case '`':
1268               if (tokenizeStrings) {
1269                 consumeStringInterpolated();
1270                 return TokenNameStringInterpolated;
1271               }
1272               return TokenNameEncapsedString0;
1273             case '-':
1274               if (getNextChar('>'))
1275                 return TokenNameMINUS_GREATER;
1276               return TokenNameSTRING;
1277             default:
1278               if (currentCharacter == '$') {
1279                 int oldPosition = currentPosition;
1280                 try {
1281                   currentCharacter = source[currentPosition++];
1282                   if (currentCharacter == '{') {
1283                     return TokenNameDOLLAR_LBRACE;
1284                   }
1285                   if (isPHPIdentifierStart(currentCharacter)) {
1286                     return scanIdentifierOrKeyword(true);
1287                   } else {
1288                     currentPosition = oldPosition;
1289                     return TokenNameSTRING;
1290                   }
1291                 } catch (IndexOutOfBoundsException e) {
1292                   currentPosition = oldPosition;
1293                   return TokenNameSTRING;
1294                 }
1295               }
1296               if (isPHPIdentifierStart(currentCharacter))
1297                 return scanIdentifierOrKeyword(false);
1298               if (Character.isDigit(currentCharacter))
1299                 return scanNumber(false);
1300               return TokenNameERROR;
1301             }
1302           }
1303           //          boolean isWhiteSpace;
1304
1305           while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1306             startPosition = currentPosition;
1307             currentCharacter = source[currentPosition++];
1308             //            if (((currentCharacter = source[currentPosition++]) == '\\')
1309             //              && (source[currentPosition] == 'u')) {
1310             //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
1311             //            } else {
1312             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1313               checkNonExternalizeString();
1314               if (recordLineSeparator) {
1315                 pushLineSeparator();
1316               } else {
1317                 currentLine = null;
1318               }
1319             }
1320             //            isWhiteSpace = (currentCharacter == ' ')
1321             //                || Character.isWhitespace(currentCharacter);
1322             //            }
1323           }
1324           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1325             // reposition scanner in case we are interested by spaces as tokens
1326             currentPosition--;
1327             startPosition = whiteStart;
1328             return TokenNameWHITESPACE;
1329           }
1330           //little trick to get out in the middle of a source compuation
1331           if (currentPosition > eofPosition)
1332             return TokenNameEOF;
1333           // ---------Identify the next token-------------
1334           switch (currentCharacter) {
1335           case '(':
1336             return getCastOrParen();
1337           case ')':
1338             return TokenNameRPAREN;
1339           case '{':
1340             return TokenNameLBRACE;
1341           case '}':
1342             return TokenNameRBRACE;
1343           case '[':
1344             return TokenNameLBRACKET;
1345           case ']':
1346             return TokenNameRBRACKET;
1347           case ';':
1348             return TokenNameSEMICOLON;
1349           case ',':
1350             return TokenNameCOMMA;
1351           case '.':
1352             if (getNextChar('='))
1353               return TokenNameDOT_EQUAL;
1354             if (getNextCharAsDigit())
1355               return scanNumber(true);
1356             return TokenNameDOT;
1357           case '+': {
1358             int test;
1359             if ((test = getNextChar('+', '=')) == 0)
1360               return TokenNamePLUS_PLUS;
1361             if (test > 0)
1362               return TokenNamePLUS_EQUAL;
1363             return TokenNamePLUS;
1364           }
1365           case '-': {
1366             int test;
1367             if ((test = getNextChar('-', '=')) == 0)
1368               return TokenNameMINUS_MINUS;
1369             if (test > 0)
1370               return TokenNameMINUS_EQUAL;
1371             if (getNextChar('>'))
1372               return TokenNameMINUS_GREATER;
1373             return TokenNameMINUS;
1374           }
1375           case '~':
1376             if (getNextChar('='))
1377               return TokenNameTWIDDLE_EQUAL;
1378             return TokenNameTWIDDLE;
1379           case '!':
1380             if (getNextChar('=')) {
1381               if (getNextChar('=')) {
1382                 return TokenNameNOT_EQUAL_EQUAL;
1383               }
1384               return TokenNameNOT_EQUAL;
1385             }
1386             return TokenNameNOT;
1387           case '*':
1388             if (getNextChar('='))
1389               return TokenNameMULTIPLY_EQUAL;
1390             return TokenNameMULTIPLY;
1391           case '%':
1392             if (getNextChar('='))
1393               return TokenNameREMAINDER_EQUAL;
1394             return TokenNameREMAINDER;
1395           case '<': {
1396             int oldPosition = currentPosition;
1397             try {
1398               currentCharacter = source[currentPosition++];
1399             } catch (IndexOutOfBoundsException e) {
1400               currentPosition = oldPosition;
1401               return TokenNameLESS;
1402             }
1403             switch (currentCharacter) {
1404             case '=':
1405               return TokenNameLESS_EQUAL;
1406             case '>':
1407               return TokenNameNOT_EQUAL;
1408             case '<':
1409               if (getNextChar('='))
1410                 return TokenNameLEFT_SHIFT_EQUAL;
1411               if (getNextChar('<')) {
1412                 currentCharacter = source[currentPosition++];
1413                 while (Character.isWhitespace(currentCharacter)) {
1414                   currentCharacter = source[currentPosition++];
1415                 }
1416                 int heredocStart = currentPosition - 1;
1417                 int heredocLength = 0;
1418                 if (isPHPIdentifierStart(currentCharacter)) {
1419                   currentCharacter = source[currentPosition++];
1420                 } else {
1421                   return TokenNameERROR;
1422                 }
1423                 while (isPHPIdentifierPart(currentCharacter)) {
1424                   currentCharacter = source[currentPosition++];
1425                 }
1426                 heredocLength = currentPosition - heredocStart - 1;
1427                 // heredoc end-tag determination
1428                 boolean endTag = true;
1429                 char ch;
1430                 do {
1431                   ch = source[currentPosition++];
1432                   if (ch == '\r' || ch == '\n') {
1433                     if (recordLineSeparator) {
1434                       pushLineSeparator();
1435                     } else {
1436                       currentLine = null;
1437                     }
1438                     for (int i = 0; i < heredocLength; i++) {
1439                       if (source[currentPosition + i] != source[heredocStart + i]) {
1440                         endTag = false;
1441                         break;
1442                       }
1443                     }
1444                     if (endTag) {
1445                       currentPosition += heredocLength - 1;
1446                       currentCharacter = source[currentPosition++];
1447                       break; // do...while loop
1448                     } else {
1449                       endTag = true;
1450                     }
1451                   }
1452                 } while (true);
1453                 return TokenNameHEREDOC;
1454               }
1455               return TokenNameLEFT_SHIFT;
1456             }
1457             currentPosition = oldPosition;
1458             return TokenNameLESS;
1459           }
1460           case '>': {
1461             int test;
1462             if ((test = getNextChar('=', '>')) == 0)
1463               return TokenNameGREATER_EQUAL;
1464             if (test > 0) {
1465               if ((test = getNextChar('=', '>')) == 0)
1466                 return TokenNameRIGHT_SHIFT_EQUAL;
1467               return TokenNameRIGHT_SHIFT;
1468             }
1469             return TokenNameGREATER;
1470           }
1471           case '=':
1472             if (getNextChar('=')) {
1473               if (getNextChar('=')) {
1474                 return TokenNameEQUAL_EQUAL_EQUAL;
1475               }
1476               return TokenNameEQUAL_EQUAL;
1477             }
1478             if (getNextChar('>'))
1479               return TokenNameEQUAL_GREATER;
1480             return TokenNameEQUAL;
1481           case '&': {
1482             int test;
1483             if ((test = getNextChar('&', '=')) == 0)
1484               return TokenNameAND_AND;
1485             if (test > 0)
1486               return TokenNameAND_EQUAL;
1487             return TokenNameAND;
1488           }
1489           case '|': {
1490             int test;
1491             if ((test = getNextChar('|', '=')) == 0)
1492               return TokenNameOR_OR;
1493             if (test > 0)
1494               return TokenNameOR_EQUAL;
1495             return TokenNameOR;
1496           }
1497           case '^':
1498             if (getNextChar('='))
1499               return TokenNameXOR_EQUAL;
1500             return TokenNameXOR;
1501           case '?':
1502             if (getNextChar('>')) {
1503               phpMode = false;
1504               if (currentPosition == source.length) {
1505                 phpMode = true;
1506                 return TokenNameINLINE_HTML;
1507               }
1508               return getInlinedHTML(currentPosition - 2);
1509             }
1510             return TokenNameQUESTION;
1511           case ':':
1512             if (getNextChar(':'))
1513               return TokenNamePAAMAYIM_NEKUDOTAYIM;
1514             return TokenNameCOLON;
1515           case '@':
1516             return TokenNameAT;
1517           case '\'':
1518             consumeStringConstant();
1519             return TokenNameStringSingleQuote;
1520           case '"':
1521             if (tokenizeStrings) {
1522               consumeStringLiteral();
1523               return TokenNameStringDoubleQuote;
1524             }
1525             return TokenNameEncapsedString2;
1526           case '`':
1527             if (tokenizeStrings) {
1528               consumeStringInterpolated();
1529               return TokenNameStringInterpolated;
1530             }
1531             return TokenNameEncapsedString0;
1532           case '#':
1533           case '/': {
1534             char startChar = currentCharacter;
1535             if (getNextChar('=') && startChar=='/') {
1536               return TokenNameDIVIDE_EQUAL;
1537             }
1538             int test;
1539             if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1540               //line comment
1541               this.lastCommentLinePosition = this.currentPosition;
1542               int endPositionForLineComment = 0;
1543               try { //get the next char
1544                 currentCharacter = source[currentPosition++];
1545                 //                    if (((currentCharacter = source[currentPosition++])
1546                 //                      == '\\')
1547                 //                      && (source[currentPosition] == 'u')) {
1548                 //                      //-------------unicode traitement ------------
1549                 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1550                 //                      currentPosition++;
1551                 //                      while (source[currentPosition] == 'u') {
1552                 //                        currentPosition++;
1553                 //                      }
1554                 //                      if ((c1 =
1555                 //                        Character.getNumericValue(source[currentPosition++]))
1556                 //                        > 15
1557                 //                        || c1 < 0
1558                 //                        || (c2 =
1559                 //                          Character.getNumericValue(source[currentPosition++]))
1560                 //                          > 15
1561                 //                        || c2 < 0
1562                 //                        || (c3 =
1563                 //                          Character.getNumericValue(source[currentPosition++]))
1564                 //                          > 15
1565                 //                        || c3 < 0
1566                 //                        || (c4 =
1567                 //                          Character.getNumericValue(source[currentPosition++]))
1568                 //                          > 15
1569                 //                        || c4 < 0) {
1570                 //                        throw new
1571                 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1572                 //                      } else {
1573                 //                        currentCharacter =
1574                 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1575                 //                      }
1576                 //                    }
1577                 //handle the \\u case manually into comment
1578                 //                    if (currentCharacter == '\\') {
1579                 //                      if (source[currentPosition] == '\\')
1580                 //                        currentPosition++;
1581                 //                    } //jump over the \\
1582                 boolean isUnicode = false;
1583                 while (currentCharacter != '\r' && currentCharacter != '\n') {
1584                   this.lastCommentLinePosition = this.currentPosition;
1585                   if (currentCharacter == '?') {
1586                     if (getNextChar('>')) {
1587                       startPosition = currentPosition - 2;
1588                       phpMode = false;
1589                       return TokenNameINLINE_HTML;
1590                     }
1591                   }
1592                   //get the next char
1593                   isUnicode = false;
1594                   currentCharacter = source[currentPosition++];
1595                   //                      if (((currentCharacter = source[currentPosition++])
1596                   //                        == '\\')
1597                   //                        && (source[currentPosition] == 'u')) {
1598                   //                        isUnicode = true;
1599                   //                        //-------------unicode traitement ------------
1600                   //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1601                   //                        currentPosition++;
1602                   //                        while (source[currentPosition] == 'u') {
1603                   //                          currentPosition++;
1604                   //                        }
1605                   //                        if ((c1 =
1606                   //                          Character.getNumericValue(source[currentPosition++]))
1607                   //                          > 15
1608                   //                          || c1 < 0
1609                   //                          || (c2 =
1610                   //                            Character.getNumericValue(
1611                   //                              source[currentPosition++]))
1612                   //                            > 15
1613                   //                          || c2 < 0
1614                   //                          || (c3 =
1615                   //                            Character.getNumericValue(
1616                   //                              source[currentPosition++]))
1617                   //                            > 15
1618                   //                          || c3 < 0
1619                   //                          || (c4 =
1620                   //                            Character.getNumericValue(
1621                   //                              source[currentPosition++]))
1622                   //                            > 15
1623                   //                          || c4 < 0) {
1624                   //                          throw new
1625                   // InvalidInputException(INVALID_UNICODE_ESCAPE);
1626                   //                        } else {
1627                   //                          currentCharacter =
1628                   //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1629                   //                        }
1630                   //                      }
1631                   //handle the \\u case manually into comment
1632                   //                      if (currentCharacter == '\\') {
1633                   //                        if (source[currentPosition] == '\\')
1634                   //                          currentPosition++;
1635                   //                      } //jump over the \\
1636                 }
1637                 if (isUnicode) {
1638                   endPositionForLineComment = currentPosition - 6;
1639                 } else {
1640                   endPositionForLineComment = currentPosition - 1;
1641                 }
1642                 //                    recordComment(false);
1643                 recordComment(TokenNameCOMMENT_LINE);
1644                 if (this.taskTags != null)
1645                   checkTaskTag(this.startPosition, this.currentPosition);
1646                 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1647                   checkNonExternalizeString();
1648                   if (recordLineSeparator) {
1649                     if (isUnicode) {
1650                       pushUnicodeLineSeparator();
1651                     } else {
1652                       pushLineSeparator();
1653                     }
1654                   } else {
1655                     currentLine = null;
1656                   }
1657                 }
1658                 if (tokenizeComments) {
1659                   if (!isUnicode) {
1660                     currentPosition = endPositionForLineComment;
1661                     // reset one character behind
1662                   }
1663                   return TokenNameCOMMENT_LINE;
1664                 }
1665               } catch (IndexOutOfBoundsException e) { //an eof will them
1666                 // be generated
1667                 if (tokenizeComments) {
1668                   currentPosition--;
1669                   // reset one character behind
1670                   return TokenNameCOMMENT_LINE;
1671                 }
1672               }
1673               break;
1674             }
1675             if (test > 0) {
1676               //traditional and annotation comment
1677               boolean isJavadoc = false, star = false;
1678               // consume next character
1679               unicodeAsBackSlash = false;
1680               currentCharacter = source[currentPosition++];
1681               //                  if (((currentCharacter = source[currentPosition++]) ==
1682               // '\\')
1683               //                    && (source[currentPosition] == 'u')) {
1684               //                    getNextUnicodeChar();
1685               //                  } else {
1686               //                    if (withoutUnicodePtr != 0) {
1687               //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1688               //                        currentCharacter;
1689               //                    }
1690               //                  }
1691               if (currentCharacter == '*') {
1692                 isJavadoc = true;
1693                 star = true;
1694               }
1695               if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1696                 checkNonExternalizeString();
1697                 if (recordLineSeparator) {
1698                   pushLineSeparator();
1699                 } else {
1700                   currentLine = null;
1701                 }
1702               }
1703               try { //get the next char
1704                 currentCharacter = source[currentPosition++];
1705                 //                    if (((currentCharacter = source[currentPosition++])
1706                 //                      == '\\')
1707                 //                      && (source[currentPosition] == 'u')) {
1708                 //                      //-------------unicode traitement ------------
1709                 //                      getNextUnicodeChar();
1710                 //                    }
1711                 //handle the \\u case manually into comment
1712                 //                    if (currentCharacter == '\\') {
1713                 //                      if (source[currentPosition] == '\\')
1714                 //                        currentPosition++;
1715                 //                      //jump over the \\
1716                 //                    }
1717                 // empty comment is not a javadoc /**/
1718                 if (currentCharacter == '/') {
1719                   isJavadoc = false;
1720                 }
1721                 //loop until end of comment */
1722                 while ((currentCharacter != '/') || (!star)) {
1723                   if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1724                     checkNonExternalizeString();
1725                     if (recordLineSeparator) {
1726                       pushLineSeparator();
1727                     } else {
1728                       currentLine = null;
1729                     }
1730                   }
1731                   star = currentCharacter == '*';
1732                   //get next char
1733                   currentCharacter = source[currentPosition++];
1734                   //                      if (((currentCharacter = source[currentPosition++])
1735                   //                        == '\\')
1736                   //                        && (source[currentPosition] == 'u')) {
1737                   //                        //-------------unicode traitement ------------
1738                   //                        getNextUnicodeChar();
1739                   //                      }
1740                   //handle the \\u case manually into comment
1741                   //                      if (currentCharacter == '\\') {
1742                   //                        if (source[currentPosition] == '\\')
1743                   //                          currentPosition++;
1744                   //                      } //jump over the \\
1745                 }
1746                 //recordComment(isJavadoc);
1747                 if (isJavadoc) {
1748                   recordComment(TokenNameCOMMENT_PHPDOC);
1749                 } else {
1750                   recordComment(TokenNameCOMMENT_BLOCK);
1751                 }
1752
1753                 if (tokenizeComments) {
1754                   if (isJavadoc)
1755                     return TokenNameCOMMENT_PHPDOC;
1756                   return TokenNameCOMMENT_BLOCK;
1757                 }
1758
1759                 if (this.taskTags != null) {
1760                   checkTaskTag(this.startPosition, this.currentPosition);
1761                 }
1762               } catch (IndexOutOfBoundsException e) {
1763                 //                  reset end position for error reporting
1764                 currentPosition -= 2;
1765                 throw new InvalidInputException(UNTERMINATED_COMMENT);
1766               }
1767               break;
1768             }
1769             return TokenNameDIVIDE;
1770           }
1771           case '\u001a':
1772             if (atEnd())
1773               return TokenNameEOF;
1774             //the atEnd may not be <currentPosition == source.length> if
1775             // source is only some part of a real (external) stream
1776             throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1777           default:
1778             if (currentCharacter == '$') {
1779               int oldPosition = currentPosition;
1780               try {
1781                 currentCharacter = source[currentPosition++];
1782                 if (isPHPIdentifierStart(currentCharacter)) {
1783                   return scanIdentifierOrKeyword(true);
1784                 } else {
1785                   currentPosition = oldPosition;
1786                   return TokenNameDOLLAR;
1787                 }
1788               } catch (IndexOutOfBoundsException e) {
1789                 currentPosition = oldPosition;
1790                 return TokenNameDOLLAR;
1791               }
1792             }
1793             if (isPHPIdentifierStart(currentCharacter))
1794               return scanIdentifierOrKeyword(false);
1795             if (Character.isDigit(currentCharacter))
1796               return scanNumber(false);
1797             return TokenNameERROR;
1798           }
1799         }
1800       } //-----------------end switch while try--------------------
1801       catch (IndexOutOfBoundsException e) {
1802       }
1803     }
1804     return TokenNameEOF;
1805   }
1806
1807   private int getInlinedHTML(int start) throws InvalidInputException {
1808     int token = getInlinedHTMLToken(start);
1809     if (token == TokenNameINLINE_HTML) {
1810       //                Stack stack = new Stack();
1811       //                // scan html for errors
1812       //                Source inlinedHTMLSource = new Source(new String(source, startPosition, currentPosition - startPosition));
1813       //                int lastPHPEndPos=0;
1814       //                for (Iterator i=inlinedHTMLSource.getNextTagIterator(0); i.hasNext();) {
1815       //                    Tag tag=(Tag)i.next();
1816       //
1817       //                    if (tag instanceof StartTag) {
1818       //                        StartTag startTag=(StartTag)tag;
1819       //                      // System.out.println("startTag: "+tag);
1820       //                        if (startTag.isServerTag()) {
1821       //                          // TODO : what to do with a server tag ?
1822       //                        } else {
1823       //                            // do whatever with HTML start tag
1824       //                            // use startTag.getElement() to find the element corresponding
1825       //                            // to this start tag which may be useful if you implement code
1826       //                            // folding etc
1827       //                                stack.push(startTag);
1828       //                        }
1829       //                    } else {
1830       //                        EndTag endTag=(EndTag)tag;
1831       //                        StartTag stag = (StartTag) stack.peek();
1832       //// System.out.println("endTag: "+tag);
1833       //                        // do whatever with HTML end tag.
1834       //                    }
1835       //                }
1836     }
1837     return token;
1838   }
1839
1840   /**
1841    * @return
1842    * @throws InvalidInputException
1843    */
1844   private int getInlinedHTMLToken(int start) throws InvalidInputException {
1845     //    int htmlPosition = start;
1846     if (currentPosition > source.length) {
1847       currentPosition = source.length;
1848       return TokenNameEOF;
1849     }
1850     startPosition = start;
1851     try {
1852       while (!phpMode) {
1853         currentCharacter = source[currentPosition++];
1854         if (currentCharacter == '<') {
1855           if (getNextChar('?')) {
1856             currentCharacter = source[currentPosition++];
1857             if ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1858               // <?
1859               if (ignorePHPOneLiner) {
1860                 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1861                   phpMode = true;
1862                   return TokenNameINLINE_HTML;
1863                 }
1864               } else {
1865                 phpMode = true;
1866                 return TokenNameINLINE_HTML;
1867               }
1868             } else {
1869               boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
1870               if (phpStart) {
1871                 int test = getNextChar('H', 'h');
1872                 if (test >= 0) {
1873                   test = getNextChar('P', 'p');
1874                   if (test >= 0) {
1875                     // <?PHP <?php
1876                     if (ignorePHPOneLiner) {
1877                       if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1878                         phpMode = true;
1879                         return TokenNameINLINE_HTML;
1880                       }
1881                     } else {
1882                       phpMode = true;
1883                       return TokenNameINLINE_HTML;
1884                     }
1885                   }
1886                 }
1887               }
1888             }
1889           }
1890         }
1891         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1892           if (recordLineSeparator) {
1893             pushLineSeparator();
1894           } else {
1895             currentLine = null;
1896           }
1897         }
1898       } //-----------------while--------------------
1899       phpMode = true;
1900       return TokenNameINLINE_HTML;
1901     } //-----------------try--------------------
1902     catch (IndexOutOfBoundsException e) {
1903       startPosition = start;
1904       currentPosition--;
1905     }
1906     phpMode = true;
1907     return TokenNameINLINE_HTML;
1908   }
1909
1910   /**
1911    * @return
1912    */
1913   private int lookAheadLinePHPTag() {
1914     // check if the PHP is only in this line (for CodeFormatter)
1915     int currentPositionInLine = currentPosition;
1916     char previousCharInLine = ' ';
1917     char currentCharInLine = ' ';
1918     boolean singleQuotedStringActive = false;
1919     boolean doubleQuotedStringActive = false;
1920
1921     try {
1922       // look ahead in this line
1923       while (true) {
1924         previousCharInLine = currentCharInLine;
1925         currentCharInLine = source[currentPositionInLine++];
1926         switch (currentCharInLine) {
1927         case '>':
1928           if (previousCharInLine == '?') {
1929             // update the scanner's current Position in the source
1930             currentPosition = currentPositionInLine;
1931             // use as "dummy" token
1932             return TokenNameEOF;
1933           }
1934           break;
1935         case '\"':
1936           if (doubleQuotedStringActive) {
1937             if (previousCharInLine != '\\') {
1938               doubleQuotedStringActive = false;
1939             }
1940           } else {
1941             if (!singleQuotedStringActive) {
1942               doubleQuotedStringActive = true;
1943             }
1944           }
1945           break;
1946         case '\'':
1947           if (singleQuotedStringActive) {
1948             if (previousCharInLine != '\\') {
1949               singleQuotedStringActive = false;
1950             }
1951           } else {
1952             if (!doubleQuotedStringActive) {
1953               singleQuotedStringActive = true;
1954             }
1955           }
1956           break;
1957         case '\n':
1958           phpMode = true;
1959           return TokenNameINLINE_HTML;
1960         case '#':
1961           if (!singleQuotedStringActive && !doubleQuotedStringActive) {
1962             phpMode = true;
1963             return TokenNameINLINE_HTML;
1964           }
1965           break;
1966         case '/':
1967           if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1968             phpMode = true;
1969             return TokenNameINLINE_HTML;
1970           }
1971           break;
1972         case '*':
1973           if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1974             phpMode = true;
1975             return TokenNameINLINE_HTML;
1976           }
1977           break;
1978         }
1979       }
1980     } catch (IndexOutOfBoundsException e) {
1981       phpMode = true;
1982       currentPosition = currentPositionInLine;
1983       return TokenNameINLINE_HTML;
1984     }
1985   }
1986
1987   //  public final void getNextUnicodeChar()
1988   //    throws IndexOutOfBoundsException, InvalidInputException {
1989   //    //VOID
1990   //    //handle the case of unicode.
1991   //    //when a unicode appears then we must use a buffer that holds char
1992   // internal values
1993   //    //At the end of this method currentCharacter holds the new visited char
1994   //    //and currentPosition points right next after it
1995   //
1996   //    //ALL getNextChar.... ARE OPTIMIZED COPIES
1997   //
1998   //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1999   //    currentPosition++;
2000   //    while (source[currentPosition] == 'u') {
2001   //      currentPosition++;
2002   //      unicodeSize++;
2003   //    }
2004   //
2005   //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2006   //      || c1 < 0
2007   //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2008   //      || c2 < 0
2009   //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2010   //      || c3 < 0
2011   //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2012   //      || c4 < 0) {
2013   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2014   //    } else {
2015   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2016   //      //need the unicode buffer
2017   //      if (withoutUnicodePtr == 0) {
2018   //        //buffer all the entries that have been left aside....
2019   //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2020   //        System.arraycopy(
2021   //          source,
2022   //          startPosition,
2023   //          withoutUnicodeBuffer,
2024   //          1,
2025   //          withoutUnicodePtr);
2026   //      }
2027   //      //fill the buffer with the char
2028   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2029   //    }
2030   //    unicodeAsBackSlash = currentCharacter == '\\';
2031   //  }
2032   /*
2033    * Tokenize a method body, assuming that curly brackets are properly balanced.
2034    */
2035   public final void jumpOverMethodBody() {
2036     this.wasAcr = false;
2037     int found = 1;
2038     try {
2039       while (true) { //loop for jumping over comments
2040         // ---------Consume white space and handles startPosition---------
2041         boolean isWhiteSpace;
2042         do {
2043           startPosition = currentPosition;
2044           currentCharacter = source[currentPosition++];
2045           //          if (((currentCharacter = source[currentPosition++]) == '\\')
2046           //            && (source[currentPosition] == 'u')) {
2047           //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
2048           //          } else {
2049           if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2050             pushLineSeparator();
2051           isWhiteSpace = Character.isWhitespace(currentCharacter);
2052           //          }
2053         } while (isWhiteSpace);
2054         // -------consume token until } is found---------
2055         switch (currentCharacter) {
2056         case '{':
2057           found++;
2058           break;
2059         case '}':
2060           found--;
2061           if (found == 0)
2062             return;
2063           break;
2064         case '\'': {
2065           boolean test;
2066           test = getNextChar('\\');
2067           if (test) {
2068             try {
2069               scanDoubleQuotedEscapeCharacter();
2070             } catch (InvalidInputException ex) {
2071             }
2072             ;
2073           } else {
2074             //                try { // consume next character
2075             unicodeAsBackSlash = false;
2076             currentCharacter = source[currentPosition++];
2077             //                  if (((currentCharacter = source[currentPosition++]) == '\\')
2078             //                    && (source[currentPosition] == 'u')) {
2079             //                    getNextUnicodeChar();
2080             //                  } else {
2081             if (withoutUnicodePtr != 0) {
2082               withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2083             }
2084             //                  }
2085             //                } catch (InvalidInputException ex) {
2086             //                };
2087           }
2088           getNextChar('\'');
2089           break;
2090         }
2091         case '"':
2092           try {
2093             //              try { // consume next character
2094             unicodeAsBackSlash = false;
2095             currentCharacter = source[currentPosition++];
2096             //                if (((currentCharacter = source[currentPosition++]) == '\\')
2097             //                  && (source[currentPosition] == 'u')) {
2098             //                  getNextUnicodeChar();
2099             //                } else {
2100             if (withoutUnicodePtr != 0) {
2101               withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2102             }
2103             //                }
2104             //              } catch (InvalidInputException ex) {
2105             //              };
2106             while (currentCharacter != '"') {
2107               if (currentCharacter == '\r') {
2108                 if (source[currentPosition] == '\n')
2109                   currentPosition++;
2110                 break;
2111                 // the string cannot go further that the line
2112               }
2113               if (currentCharacter == '\n') {
2114                 break;
2115                 // the string cannot go further that the line
2116               }
2117               if (currentCharacter == '\\') {
2118                 try {
2119                   scanDoubleQuotedEscapeCharacter();
2120                 } catch (InvalidInputException ex) {
2121                 }
2122                 ;
2123               }
2124               //                try { // consume next character
2125               unicodeAsBackSlash = false;
2126               currentCharacter = source[currentPosition++];
2127               //                  if (((currentCharacter = source[currentPosition++]) == '\\')
2128               //                    && (source[currentPosition] == 'u')) {
2129               //                    getNextUnicodeChar();
2130               //                  } else {
2131               if (withoutUnicodePtr != 0) {
2132                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2133               }
2134               //                  }
2135               //                } catch (InvalidInputException ex) {
2136               //                };
2137             }
2138           } catch (IndexOutOfBoundsException e) {
2139             return;
2140           }
2141           break;
2142         case '/': {
2143           int test;
2144           if ((test = getNextChar('/', '*')) == 0) {
2145             //line comment
2146             try {
2147               //get the next char
2148               currentCharacter = source[currentPosition++];
2149               //                  if (((currentCharacter = source[currentPosition++]) ==
2150               // '\\')
2151               //                    && (source[currentPosition] == 'u')) {
2152               //                    //-------------unicode traitement ------------
2153               //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2154               //                    currentPosition++;
2155               //                    while (source[currentPosition] == 'u') {
2156               //                      currentPosition++;
2157               //                    }
2158               //                    if ((c1 =
2159               //                      Character.getNumericValue(source[currentPosition++]))
2160               //                      > 15
2161               //                      || c1 < 0
2162               //                      || (c2 =
2163               //                        Character.getNumericValue(source[currentPosition++]))
2164               //                        > 15
2165               //                      || c2 < 0
2166               //                      || (c3 =
2167               //                        Character.getNumericValue(source[currentPosition++]))
2168               //                        > 15
2169               //                      || c3 < 0
2170               //                      || (c4 =
2171               //                        Character.getNumericValue(source[currentPosition++]))
2172               //                        > 15
2173               //                      || c4 < 0) {
2174               //                      //error don't care of the value
2175               //                      currentCharacter = 'A';
2176               //                    } //something different from \n and \r
2177               //                    else {
2178               //                      currentCharacter =
2179               //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2180               //                    }
2181               //                  }
2182               while (currentCharacter != '\r' && currentCharacter != '\n') {
2183                 //get the next char
2184                 currentCharacter = source[currentPosition++];
2185                 //                    if (((currentCharacter = source[currentPosition++])
2186                 //                      == '\\')
2187                 //                      && (source[currentPosition] == 'u')) {
2188                 //                      //-------------unicode traitement ------------
2189                 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2190                 //                      currentPosition++;
2191                 //                      while (source[currentPosition] == 'u') {
2192                 //                        currentPosition++;
2193                 //                      }
2194                 //                      if ((c1 =
2195                 //                        Character.getNumericValue(source[currentPosition++]))
2196                 //                        > 15
2197                 //                        || c1 < 0
2198                 //                        || (c2 =
2199                 //                          Character.getNumericValue(source[currentPosition++]))
2200                 //                          > 15
2201                 //                        || c2 < 0
2202                 //                        || (c3 =
2203                 //                          Character.getNumericValue(source[currentPosition++]))
2204                 //                          > 15
2205                 //                        || c3 < 0
2206                 //                        || (c4 =
2207                 //                          Character.getNumericValue(source[currentPosition++]))
2208                 //                          > 15
2209                 //                        || c4 < 0) {
2210                 //                        //error don't care of the value
2211                 //                        currentCharacter = 'A';
2212                 //                      } //something different from \n and \r
2213                 //                      else {
2214                 //                        currentCharacter =
2215                 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2216                 //                      }
2217                 //                    }
2218               }
2219               if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2220                 pushLineSeparator();
2221             } catch (IndexOutOfBoundsException e) {
2222             } //an eof will them be generated
2223             break;
2224           }
2225           if (test > 0) {
2226             //traditional and annotation comment
2227             boolean star = false;
2228             //                try { // consume next character
2229             unicodeAsBackSlash = false;
2230             currentCharacter = source[currentPosition++];
2231             //                  if (((currentCharacter = source[currentPosition++]) == '\\')
2232             //                    && (source[currentPosition] == 'u')) {
2233             //                    getNextUnicodeChar();
2234             //                  } else {
2235             if (withoutUnicodePtr != 0) {
2236               withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2237             }
2238             //                  };
2239             //                } catch (InvalidInputException ex) {
2240             //                };
2241             if (currentCharacter == '*') {
2242               star = true;
2243             }
2244             if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2245               pushLineSeparator();
2246             try { //get the next char
2247               currentCharacter = source[currentPosition++];
2248               //                  if (((currentCharacter = source[currentPosition++]) ==
2249               // '\\')
2250               //                    && (source[currentPosition] == 'u')) {
2251               //                    //-------------unicode traitement ------------
2252               //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2253               //                    currentPosition++;
2254               //                    while (source[currentPosition] == 'u') {
2255               //                      currentPosition++;
2256               //                    }
2257               //                    if ((c1 =
2258               //                      Character.getNumericValue(source[currentPosition++]))
2259               //                      > 15
2260               //                      || c1 < 0
2261               //                      || (c2 =
2262               //                        Character.getNumericValue(source[currentPosition++]))
2263               //                        > 15
2264               //                      || c2 < 0
2265               //                      || (c3 =
2266               //                        Character.getNumericValue(source[currentPosition++]))
2267               //                        > 15
2268               //                      || c3 < 0
2269               //                      || (c4 =
2270               //                        Character.getNumericValue(source[currentPosition++]))
2271               //                        > 15
2272               //                      || c4 < 0) {
2273               //                      //error don't care of the value
2274               //                      currentCharacter = 'A';
2275               //                    } //something different from * and /
2276               //                    else {
2277               //                      currentCharacter =
2278               //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2279               //                    }
2280               //                  }
2281               //loop until end of comment */
2282               while ((currentCharacter != '/') || (!star)) {
2283                 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2284                   pushLineSeparator();
2285                 star = currentCharacter == '*';
2286                 //get next char
2287                 currentCharacter = source[currentPosition++];
2288                 //                    if (((currentCharacter = source[currentPosition++])
2289                 //                      == '\\')
2290                 //                      && (source[currentPosition] == 'u')) {
2291                 //                      //-------------unicode traitement ------------
2292                 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2293                 //                      currentPosition++;
2294                 //                      while (source[currentPosition] == 'u') {
2295                 //                        currentPosition++;
2296                 //                      }
2297                 //                      if ((c1 =
2298                 //                        Character.getNumericValue(source[currentPosition++]))
2299                 //                        > 15
2300                 //                        || c1 < 0
2301                 //                        || (c2 =
2302                 //                          Character.getNumericValue(source[currentPosition++]))
2303                 //                          > 15
2304                 //                        || c2 < 0
2305                 //                        || (c3 =
2306                 //                          Character.getNumericValue(source[currentPosition++]))
2307                 //                          > 15
2308                 //                        || c3 < 0
2309                 //                        || (c4 =
2310                 //                          Character.getNumericValue(source[currentPosition++]))
2311                 //                          > 15
2312                 //                        || c4 < 0) {
2313                 //                        //error don't care of the value
2314                 //                        currentCharacter = 'A';
2315                 //                      } //something different from * and /
2316                 //                      else {
2317                 //                        currentCharacter =
2318                 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2319                 //                      }
2320                 //                    }
2321               }
2322             } catch (IndexOutOfBoundsException e) {
2323               return;
2324             }
2325             break;
2326           }
2327           break;
2328         }
2329         default:
2330           if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
2331             try {
2332               scanIdentifierOrKeyword((currentCharacter == '$'));
2333             } catch (InvalidInputException ex) {
2334             }
2335             ;
2336             break;
2337           }
2338           if (Character.isDigit(currentCharacter)) {
2339             try {
2340               scanNumber(false);
2341             } catch (InvalidInputException ex) {
2342             }
2343             ;
2344             break;
2345           }
2346         }
2347       }
2348       //-----------------end switch while try--------------------
2349     } catch (IndexOutOfBoundsException e) {
2350     } catch (InvalidInputException e) {
2351     }
2352     return;
2353   }
2354
2355   //  public final boolean jumpOverUnicodeWhiteSpace()
2356   //    throws InvalidInputException {
2357   //    //BOOLEAN
2358   //    //handle the case of unicode. Jump over the next whiteSpace
2359   //    //making startPosition pointing on the next available char
2360   //    //On false, the currentCharacter is filled up with a potential
2361   //    //correct char
2362   //
2363   //    try {
2364   //      this.wasAcr = false;
2365   //      int c1, c2, c3, c4;
2366   //      int unicodeSize = 6;
2367   //      currentPosition++;
2368   //      while (source[currentPosition] == 'u') {
2369   //        currentPosition++;
2370   //        unicodeSize++;
2371   //      }
2372   //
2373   //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2374   //        || c1 < 0)
2375   //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2376   //          || c2 < 0)
2377   //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2378   //          || c3 < 0)
2379   //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2380   //          || c4 < 0)) {
2381   //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2382   //      }
2383   //
2384   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2385   //      if (recordLineSeparator
2386   //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2387   //        pushLineSeparator();
2388   //      if (Character.isWhitespace(currentCharacter))
2389   //        return true;
2390   //
2391   //      //buffer the new char which is not a white space
2392   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2393   //      //withoutUnicodePtr == 1 is true here
2394   //      return false;
2395   //    } catch (IndexOutOfBoundsException e) {
2396   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2397   //    }
2398   //  }
2399   public final int[] getLineEnds() {
2400     //return a bounded copy of this.lineEnds
2401     int[] copy;
2402     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2403     return copy;
2404   }
2405
2406   public char[] getSource() {
2407     return this.source;
2408   }
2409
2410   public static boolean isIdentifierOrKeyword(int token) {
2411     return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2412   }
2413
2414   final char[] optimizedCurrentTokenSource1() {
2415     //return always the same char[] build only once
2416     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2417     char charOne = source[startPosition];
2418     switch (charOne) {
2419     case 'a':
2420       return charArray_a;
2421     case 'b':
2422       return charArray_b;
2423     case 'c':
2424       return charArray_c;
2425     case 'd':
2426       return charArray_d;
2427     case 'e':
2428       return charArray_e;
2429     case 'f':
2430       return charArray_f;
2431     case 'g':
2432       return charArray_g;
2433     case 'h':
2434       return charArray_h;
2435     case 'i':
2436       return charArray_i;
2437     case 'j':
2438       return charArray_j;
2439     case 'k':
2440       return charArray_k;
2441     case 'l':
2442       return charArray_l;
2443     case 'm':
2444       return charArray_m;
2445     case 'n':
2446       return charArray_n;
2447     case 'o':
2448       return charArray_o;
2449     case 'p':
2450       return charArray_p;
2451     case 'q':
2452       return charArray_q;
2453     case 'r':
2454       return charArray_r;
2455     case 's':
2456       return charArray_s;
2457     case 't':
2458       return charArray_t;
2459     case 'u':
2460       return charArray_u;
2461     case 'v':
2462       return charArray_v;
2463     case 'w':
2464       return charArray_w;
2465     case 'x':
2466       return charArray_x;
2467     case 'y':
2468       return charArray_y;
2469     case 'z':
2470       return charArray_z;
2471     default:
2472       return new char[] { charOne };
2473     }
2474   }
2475
2476   final char[] optimizedCurrentTokenSource2() {
2477     //try to return the same char[] build only once
2478     char c0, c1;
2479     int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) % TableSize;
2480     char[][] table = charArray_length[0][hash];
2481     int i = newEntry2;
2482     while (++i < InternalTableSize) {
2483       char[] charArray = table[i];
2484       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2485         return charArray;
2486     }
2487     //---------other side---------
2488     i = -1;
2489     int max = newEntry2;
2490     while (++i <= max) {
2491       char[] charArray = table[i];
2492       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2493         return charArray;
2494     }
2495     //--------add the entry-------
2496     if (++max >= InternalTableSize)
2497       max = 0;
2498     char[] r;
2499     table[max] = (r = new char[] { c0, c1 });
2500     newEntry2 = max;
2501     return r;
2502   }
2503
2504   final char[] optimizedCurrentTokenSource3() {
2505     //try to return the same char[] build only once
2506     char c0, c1, c2;
2507     int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2508         % TableSize;
2509     char[][] table = charArray_length[1][hash];
2510     int i = newEntry3;
2511     while (++i < InternalTableSize) {
2512       char[] charArray = table[i];
2513       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2514         return charArray;
2515     }
2516     //---------other side---------
2517     i = -1;
2518     int max = newEntry3;
2519     while (++i <= max) {
2520       char[] charArray = table[i];
2521       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2522         return charArray;
2523     }
2524     //--------add the entry-------
2525     if (++max >= InternalTableSize)
2526       max = 0;
2527     char[] r;
2528     table[max] = (r = new char[] { c0, c1, c2 });
2529     newEntry3 = max;
2530     return r;
2531   }
2532
2533   final char[] optimizedCurrentTokenSource4() {
2534     //try to return the same char[] build only once
2535     char c0, c1, c2, c3;
2536     long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2537         + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2538         % TableSize;
2539     char[][] table = charArray_length[2][(int) hash];
2540     int i = newEntry4;
2541     while (++i < InternalTableSize) {
2542       char[] charArray = table[i];
2543       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2544         return charArray;
2545     }
2546     //---------other side---------
2547     i = -1;
2548     int max = newEntry4;
2549     while (++i <= max) {
2550       char[] charArray = table[i];
2551       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2552         return charArray;
2553     }
2554     //--------add the entry-------
2555     if (++max >= InternalTableSize)
2556       max = 0;
2557     char[] r;
2558     table[max] = (r = new char[] { c0, c1, c2, c3 });
2559     newEntry4 = max;
2560     return r;
2561   }
2562
2563   final char[] optimizedCurrentTokenSource5() {
2564     //try to return the same char[] build only once
2565     char c0, c1, c2, c3, c4;
2566     long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2567         + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2568         % TableSize;
2569     char[][] table = charArray_length[3][(int) hash];
2570     int i = newEntry5;
2571     while (++i < InternalTableSize) {
2572       char[] charArray = table[i];
2573       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2574         return charArray;
2575     }
2576     //---------other side---------
2577     i = -1;
2578     int max = newEntry5;
2579     while (++i <= max) {
2580       char[] charArray = table[i];
2581       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2582         return charArray;
2583     }
2584     //--------add the entry-------
2585     if (++max >= InternalTableSize)
2586       max = 0;
2587     char[] r;
2588     table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2589     newEntry5 = max;
2590     return r;
2591   }
2592
2593   final char[] optimizedCurrentTokenSource6() {
2594     //try to return the same char[] build only once
2595     char c0, c1, c2, c3, c4, c5;
2596     long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2597         + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2598         + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2599         % TableSize;
2600     char[][] table = charArray_length[4][(int) hash];
2601     int i = newEntry6;
2602     while (++i < InternalTableSize) {
2603       char[] charArray = table[i];
2604       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2605           && (c5 == charArray[5]))
2606         return charArray;
2607     }
2608     //---------other side---------
2609     i = -1;
2610     int max = newEntry6;
2611     while (++i <= max) {
2612       char[] charArray = table[i];
2613       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2614           && (c5 == charArray[5]))
2615         return charArray;
2616     }
2617     //--------add the entry-------
2618     if (++max >= InternalTableSize)
2619       max = 0;
2620     char[] r;
2621     table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2622     newEntry6 = max;
2623     return r;
2624   }
2625
2626   public final void pushLineSeparator() throws InvalidInputException {
2627     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2628     final int INCREMENT = 250;
2629     if (this.checkNonExternalizedStringLiterals) {
2630       // reinitialize the current line for non externalize strings purpose
2631       currentLine = null;
2632     }
2633     //currentCharacter is at position currentPosition-1
2634     // cr 000D
2635     if (currentCharacter == '\r') {
2636       int separatorPos = currentPosition - 1;
2637       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2638         return;
2639       //System.out.println("CR-" + separatorPos);
2640       try {
2641         lineEnds[++linePtr] = separatorPos;
2642       } catch (IndexOutOfBoundsException e) {
2643         //linePtr value is correct
2644         int oldLength = lineEnds.length;
2645         int[] old = lineEnds;
2646         lineEnds = new int[oldLength + INCREMENT];
2647         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2648         lineEnds[linePtr] = separatorPos;
2649       }
2650       // look-ahead for merged cr+lf
2651       try {
2652         if (source[currentPosition] == '\n') {
2653           //System.out.println("look-ahead LF-" + currentPosition);
2654           lineEnds[linePtr] = currentPosition;
2655           currentPosition++;
2656           wasAcr = false;
2657         } else {
2658           wasAcr = true;
2659         }
2660       } catch (IndexOutOfBoundsException e) {
2661         wasAcr = true;
2662       }
2663     } else {
2664       // lf 000A
2665       if (currentCharacter == '\n') {
2666         //must merge eventual cr followed by lf
2667         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2668           //System.out.println("merge LF-" + (currentPosition - 1));
2669           lineEnds[linePtr] = currentPosition - 1;
2670         } else {
2671           int separatorPos = currentPosition - 1;
2672           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2673             return;
2674           // System.out.println("LF-" + separatorPos);
2675           try {
2676             lineEnds[++linePtr] = separatorPos;
2677           } catch (IndexOutOfBoundsException e) {
2678             //linePtr value is correct
2679             int oldLength = lineEnds.length;
2680             int[] old = lineEnds;
2681             lineEnds = new int[oldLength + INCREMENT];
2682             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2683             lineEnds[linePtr] = separatorPos;
2684           }
2685         }
2686         wasAcr = false;
2687       }
2688     }
2689   }
2690
2691   public final void pushUnicodeLineSeparator() {
2692     // isUnicode means that the \r or \n has been read as a unicode character
2693     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2694     final int INCREMENT = 250;
2695     //currentCharacter is at position currentPosition-1
2696     if (this.checkNonExternalizedStringLiterals) {
2697       // reinitialize the current line for non externalize strings purpose
2698       currentLine = null;
2699     }
2700     // cr 000D
2701     if (currentCharacter == '\r') {
2702       int separatorPos = currentPosition - 6;
2703       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2704         return;
2705       //System.out.println("CR-" + separatorPos);
2706       try {
2707         lineEnds[++linePtr] = separatorPos;
2708       } catch (IndexOutOfBoundsException e) {
2709         //linePtr value is correct
2710         int oldLength = lineEnds.length;
2711         int[] old = lineEnds;
2712         lineEnds = new int[oldLength + INCREMENT];
2713         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2714         lineEnds[linePtr] = separatorPos;
2715       }
2716       // look-ahead for merged cr+lf
2717       if (source[currentPosition] == '\n') {
2718         //System.out.println("look-ahead LF-" + currentPosition);
2719         lineEnds[linePtr] = currentPosition;
2720         currentPosition++;
2721         wasAcr = false;
2722       } else {
2723         wasAcr = true;
2724       }
2725     } else {
2726       // lf 000A
2727       if (currentCharacter == '\n') {
2728         //must merge eventual cr followed by lf
2729         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2730           //System.out.println("merge LF-" + (currentPosition - 1));
2731           lineEnds[linePtr] = currentPosition - 6;
2732         } else {
2733           int separatorPos = currentPosition - 6;
2734           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2735             return;
2736           // System.out.println("LF-" + separatorPos);
2737           try {
2738             lineEnds[++linePtr] = separatorPos;
2739           } catch (IndexOutOfBoundsException e) {
2740             //linePtr value is correct
2741             int oldLength = lineEnds.length;
2742             int[] old = lineEnds;
2743             lineEnds = new int[oldLength + INCREMENT];
2744             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2745             lineEnds[linePtr] = separatorPos;
2746           }
2747         }
2748         wasAcr = false;
2749       }
2750     }
2751   }
2752
2753   public void recordComment(int token) {
2754     // compute position
2755     int stopPosition = this.currentPosition;
2756     switch (token) {
2757     case TokenNameCOMMENT_LINE:
2758       stopPosition = -this.lastCommentLinePosition;
2759       break;
2760     case TokenNameCOMMENT_BLOCK:
2761       stopPosition = -this.currentPosition;
2762       break;
2763     }
2764
2765     // a new comment is recorded
2766     int length = this.commentStops.length;
2767     if (++this.commentPtr >= length) {
2768       System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2769       //grows the positions buffers too
2770       System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2771     }
2772     this.commentStops[this.commentPtr] = stopPosition;
2773     this.commentStarts[this.commentPtr] = this.startPosition;
2774   }
2775
2776   //  public final void recordComment(boolean isJavadoc) {
2777   //    // a new annotation comment is recorded
2778   //    try {
2779   //      commentStops[++commentPtr] = isJavadoc
2780   //          ? currentPosition
2781   //          : -currentPosition;
2782   //    } catch (IndexOutOfBoundsException e) {
2783   //      int oldStackLength = commentStops.length;
2784   //      int[] oldStack = commentStops;
2785   //      commentStops = new int[oldStackLength + 30];
2786   //      System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2787   //      commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2788   //      //grows the positions buffers too
2789   //      int[] old = commentStarts;
2790   //      commentStarts = new int[oldStackLength + 30];
2791   //      System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2792   //    }
2793   //    //the buffer is of a correct size here
2794   //    commentStarts[commentPtr] = startPosition;
2795   //  }
2796   public void resetTo(int begin, int end) {
2797     //reset the scanner to a given position where it may rescan again
2798     diet = false;
2799     initialPosition = startPosition = currentPosition = begin;
2800     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2801     commentPtr = -1; // reset comment stack
2802   }
2803
2804   public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2805     // the string with "\\u" is a legal string of two chars \ and u
2806     //thus we use a direct access to the source (for regular cases).
2807     //    if (unicodeAsBackSlash) {
2808     //      // consume next character
2809     //      unicodeAsBackSlash = false;
2810     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2811     //        && (source[currentPosition] == 'u')) {
2812     //        getNextUnicodeChar();
2813     //      } else {
2814     //        if (withoutUnicodePtr != 0) {
2815     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2816     //        }
2817     //      }
2818     //    } else
2819     currentCharacter = source[currentPosition++];
2820     switch (currentCharacter) {
2821     case '\'':
2822       currentCharacter = '\'';
2823       break;
2824     case '\\':
2825       currentCharacter = '\\';
2826       break;
2827     default:
2828       currentCharacter = '\\';
2829       currentPosition--;
2830     }
2831   }
2832
2833   public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2834     // the string with "\\u" is a legal string of two chars \ and u
2835     //thus we use a direct access to the source (for regular cases).
2836     //    if (unicodeAsBackSlash) {
2837     //      // consume next character
2838     //      unicodeAsBackSlash = false;
2839     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2840     //        && (source[currentPosition] == 'u')) {
2841     //        getNextUnicodeChar();
2842     //      } else {
2843     //        if (withoutUnicodePtr != 0) {
2844     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2845     //        }
2846     //      }
2847     //    } else
2848     currentCharacter = source[currentPosition++];
2849     switch (currentCharacter) {
2850     //      case 'b' :
2851     //        currentCharacter = '\b';
2852     //        break;
2853     case 't':
2854       currentCharacter = '\t';
2855       break;
2856     case 'n':
2857       currentCharacter = '\n';
2858       break;
2859     //      case 'f' :
2860     //        currentCharacter = '\f';
2861     //        break;
2862     case 'r':
2863       currentCharacter = '\r';
2864       break;
2865     case '\"':
2866       currentCharacter = '\"';
2867       break;
2868     case '\'':
2869       currentCharacter = '\'';
2870       break;
2871     case '\\':
2872       currentCharacter = '\\';
2873       break;
2874     case '$':
2875       currentCharacter = '$';
2876       break;
2877     default:
2878       // -----------octal escape--------------
2879       // OctalDigit
2880       // OctalDigit OctalDigit
2881       // ZeroToThree OctalDigit OctalDigit
2882       int number = Character.getNumericValue(currentCharacter);
2883       if (number >= 0 && number <= 7) {
2884         boolean zeroToThreeNot = number > 3;
2885         if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2886           int digit = Character.getNumericValue(currentCharacter);
2887           if (digit >= 0 && digit <= 7) {
2888             number = (number * 8) + digit;
2889             if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2890               if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2891                 // Digit --> ignore last character
2892                 currentPosition--;
2893               } else {
2894                 digit = Character.getNumericValue(currentCharacter);
2895                 if (digit >= 0 && digit <= 7) {
2896                   // has read \ZeroToThree OctalDigit OctalDigit
2897                   number = (number * 8) + digit;
2898                 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2899                   // --> ignore last character
2900                   currentPosition--;
2901                 }
2902               }
2903             } else { // has read \OctalDigit NonDigit--> ignore last
2904               // character
2905               currentPosition--;
2906             }
2907           } else { // has read \OctalDigit NonOctalDigit--> ignore last
2908             // character
2909             currentPosition--;
2910           }
2911         } else { // has read \OctalDigit --> ignore last character
2912           currentPosition--;
2913         }
2914         if (number > 255)
2915           throw new InvalidInputException(INVALID_ESCAPE);
2916         currentCharacter = (char) number;
2917       }
2918     //else
2919     //     throw new InvalidInputException(INVALID_ESCAPE);
2920     }
2921   }
2922
2923   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2924   //    return scanIdentifierOrKeyword( false );
2925   //  }
2926   public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
2927     //test keywords
2928     //first dispatch on the first char.
2929     //then the length. If there are several
2930     //keywors with the same length AND the same first char, then do another
2931     //disptach on the second char :-)...cool....but fast !
2932     useAssertAsAnIndentifier = false;
2933     while (getNextCharAsJavaIdentifierPart()) {
2934     }
2935     ;
2936     if (isVariable) {
2937       //      if (new String(getCurrentTokenSource()).equals("$this")) {
2938       //        return TokenNamethis;
2939       //      }
2940       return TokenNameVariable;
2941     }
2942     int index, length;
2943     char[] data;
2944     char firstLetter;
2945     //    if (withoutUnicodePtr == 0)
2946     //quick test on length == 1 but not on length > 12 while most identifier
2947     //have a length which is <= 12...but there are lots of identifier with
2948     //only one char....
2949     //      {
2950     if ((length = currentPosition - startPosition) == 1)
2951       return TokenNameIdentifier;
2952     //  data = source;
2953     data = new char[length];
2954     index = startPosition;
2955     for (int i = 0; i < length; i++) {
2956       data[i] = Character.toLowerCase(source[index + i]);
2957     }
2958     index = 0;
2959     //    } else {
2960     //      if ((length = withoutUnicodePtr) == 1)
2961     //        return TokenNameIdentifier;
2962     //      // data = withoutUnicodeBuffer;
2963     //      data = new char[withoutUnicodeBuffer.length];
2964     //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2965     //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2966     //      }
2967     //      index = 1;
2968     //    }
2969     firstLetter = data[index];
2970     switch (firstLetter) {
2971     case '_':
2972       switch (length) {
2973       case 8:
2974         //__FILE__
2975         if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
2976             && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
2977           return TokenNameFILE;
2978         index = 0; //__LINE__
2979         if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
2980             && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
2981           return TokenNameLINE;
2982         break;
2983       case 9:
2984         //__CLASS__
2985         if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
2986             && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
2987           return TokenNameCLASS_C;
2988         break;
2989       case 11:
2990         //__METHOD__
2991         if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
2992             && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
2993             && (data[++index] == '_'))
2994           return TokenNameMETHOD_C;
2995         break;
2996       case 12:
2997         //__FUNCTION__
2998         if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
2999             && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
3000             && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
3001           return TokenNameFUNC_C;
3002         break;
3003       }
3004       return TokenNameIdentifier;
3005     case 'a':
3006       // as and array abstract
3007       switch (length) {
3008       case 2:
3009         //as
3010         if ((data[++index] == 's')) {
3011           return TokenNameas;
3012         } else {
3013           return TokenNameIdentifier;
3014         }
3015       case 3:
3016         //and
3017         if ((data[++index] == 'n') && (data[++index] == 'd')) {
3018           return TokenNameand;
3019         } else {
3020           return TokenNameIdentifier;
3021         }
3022       case 5:
3023         // array
3024         if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3025           return TokenNamearray;
3026         else
3027           return TokenNameIdentifier;
3028       case 8:
3029         if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3030             && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3031           return TokenNameabstract;
3032         else
3033           return TokenNameIdentifier;
3034       default:
3035         return TokenNameIdentifier;
3036       }
3037     case 'b':
3038       //break
3039       switch (length) {
3040       case 5:
3041         if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3042           return TokenNamebreak;
3043         else
3044           return TokenNameIdentifier;
3045       default:
3046         return TokenNameIdentifier;
3047       }
3048     case 'c':
3049       //case catch class clone const continue
3050       switch (length) {
3051       case 4:
3052         if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3053           return TokenNamecase;
3054         else
3055           return TokenNameIdentifier;
3056       case 5:
3057         if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3058           return TokenNamecatch;
3059         index = 0;
3060         if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3061           return TokenNameclass;
3062         index = 0;
3063         if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3064           return TokenNameclone;
3065         index = 0;
3066         if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3067           return TokenNameconst;
3068         else
3069           return TokenNameIdentifier;
3070       case 8:
3071         if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3072             && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3073           return TokenNamecontinue;
3074         else
3075           return TokenNameIdentifier;
3076       default:
3077         return TokenNameIdentifier;
3078       }
3079     case 'd':
3080       // declare default do die
3081       // TODO delete define ==> no keyword !
3082       switch (length) {
3083       case 2:
3084         if ((data[++index] == 'o'))
3085           return TokenNamedo;
3086         else
3087           return TokenNameIdentifier;
3088       //          case 6 :
3089       //            if ((data[++index] == 'e')
3090       //              && (data[++index] == 'f')
3091       //              && (data[++index] == 'i')
3092       //              && (data[++index] == 'n')
3093       //              && (data[++index] == 'e'))
3094       //              return TokenNamedefine;
3095       //            else
3096       //              return TokenNameIdentifier;
3097       case 7:
3098         if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3099             && (data[++index] == 'r') && (data[++index] == 'e'))
3100           return TokenNamedeclare;
3101         index = 0;
3102         if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3103             && (data[++index] == 'l') && (data[++index] == 't'))
3104           return TokenNamedefault;
3105         else
3106           return TokenNameIdentifier;
3107       default:
3108         return TokenNameIdentifier;
3109       }
3110     case 'e':
3111       //echo else exit elseif extends eval
3112       switch (length) {
3113       case 4:
3114         if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3115           return TokenNameecho;
3116         else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3117           return TokenNameelse;
3118         else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3119           return TokenNameexit;
3120         else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3121           return TokenNameeval;
3122         else
3123           return TokenNameIdentifier;
3124       case 5:
3125         // endif empty
3126         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3127           return TokenNameendif;
3128         if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3129           return TokenNameempty;
3130         else
3131           return TokenNameIdentifier;
3132       case 6:
3133         // endfor
3134         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3135             && (data[++index] == 'r'))
3136           return TokenNameendfor;
3137         else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3138             && (data[++index] == 'f'))
3139           return TokenNameelseif;
3140         else
3141           return TokenNameIdentifier;
3142       case 7:
3143         if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3144             && (data[++index] == 'd') && (data[++index] == 's'))
3145           return TokenNameextends;
3146         else
3147           return TokenNameIdentifier;
3148       case 8:
3149         // endwhile
3150         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3151             && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3152           return TokenNameendwhile;
3153         else
3154           return TokenNameIdentifier;
3155       case 9:
3156         // endswitch
3157         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3158             && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3159           return TokenNameendswitch;
3160         else
3161           return TokenNameIdentifier;
3162       case 10:
3163         // enddeclare
3164         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3165             && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3166             && (data[++index] == 'e'))
3167           return TokenNameenddeclare;
3168         index = 0;
3169         if ((data[++index] == 'n') // endforeach
3170             && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3171             && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3172           return TokenNameendforeach;
3173         else
3174           return TokenNameIdentifier;
3175       default:
3176         return TokenNameIdentifier;
3177       }
3178     case 'f':
3179       //for false final function
3180       switch (length) {
3181       case 3:
3182         if ((data[++index] == 'o') && (data[++index] == 'r'))
3183           return TokenNamefor;
3184         else
3185           return TokenNameIdentifier;
3186       case 5:
3187         //            if ((data[++index] == 'a') && (data[++index] == 'l')
3188         //                && (data[++index] == 's') && (data[++index] == 'e'))
3189         //              return TokenNamefalse;
3190         if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3191           return TokenNamefinal;
3192         else
3193           return TokenNameIdentifier;
3194       case 7:
3195         // foreach
3196         if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3197             && (data[++index] == 'c') && (data[++index] == 'h'))
3198           return TokenNameforeach;
3199         else
3200           return TokenNameIdentifier;
3201       case 8:
3202         // function
3203         if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3204             && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3205           return TokenNamefunction;
3206         else
3207           return TokenNameIdentifier;
3208       default:
3209         return TokenNameIdentifier;
3210       }
3211     case 'g':
3212       //global
3213       if (length == 6) {
3214         if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3215             && (data[++index] == 'l')) {
3216           return TokenNameglobal;
3217         }
3218       }
3219       return TokenNameIdentifier;
3220     case 'i':
3221       //if int isset include include_once instanceof interface implements
3222       switch (length) {
3223       case 2:
3224         if (data[++index] == 'f')
3225           return TokenNameif;
3226         else
3227           return TokenNameIdentifier;
3228       //          case 3 :
3229       //            if ((data[++index] == 'n') && (data[++index] == 't'))
3230       //              return TokenNameint;
3231       //            else
3232       //              return TokenNameIdentifier;
3233       case 5:
3234         if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3235           return TokenNameisset;
3236         else
3237           return TokenNameIdentifier;
3238       case 7:
3239         if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3240             && (data[++index] == 'd') && (data[++index] == 'e'))
3241           return TokenNameinclude;
3242         else
3243           return TokenNameIdentifier;
3244       case 9:
3245         // interface
3246         if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3247             && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3248           return TokenNameinterface;
3249         else
3250           return TokenNameIdentifier;
3251       case 10:
3252         // instanceof
3253         if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3254             && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3255             && (data[++index] == 'f'))
3256           return TokenNameinstanceof;
3257         if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3258             && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3259             && (data[++index] == 's'))
3260           return TokenNameimplements;
3261         else
3262           return TokenNameIdentifier;
3263       case 12:
3264         if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3265             && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3266             && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3267           return TokenNameinclude_once;
3268         else
3269           return TokenNameIdentifier;
3270       default:
3271         return TokenNameIdentifier;
3272       }
3273     case 'l':
3274       //list
3275       if (length == 4) {
3276         if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3277           return TokenNamelist;
3278         }
3279       }
3280       return TokenNameIdentifier;
3281     case 'n':
3282       // new null
3283       switch (length) {
3284       case 3:
3285         if ((data[++index] == 'e') && (data[++index] == 'w'))
3286           return TokenNamenew;
3287         else
3288           return TokenNameIdentifier;
3289       //          case 4 :
3290       //            if ((data[++index] == 'u') && (data[++index] == 'l')
3291       //                && (data[++index] == 'l'))
3292       //              return TokenNamenull;
3293       //            else
3294       //              return TokenNameIdentifier;
3295       default:
3296         return TokenNameIdentifier;
3297       }
3298     case 'o':
3299       // or old_function
3300       if (length == 2) {
3301         if (data[++index] == 'r') {
3302           return TokenNameor;
3303         }
3304       }
3305       //        if (length == 12) {
3306       //          if ((data[++index] == 'l')
3307       //            && (data[++index] == 'd')
3308       //            && (data[++index] == '_')
3309       //            && (data[++index] == 'f')
3310       //            && (data[++index] == 'u')
3311       //            && (data[++index] == 'n')
3312       //            && (data[++index] == 'c')
3313       //            && (data[++index] == 't')
3314       //            && (data[++index] == 'i')
3315       //            && (data[++index] == 'o')
3316       //            && (data[++index] == 'n')) {
3317       //            return TokenNameold_function;
3318       //          }
3319       //        }
3320       return TokenNameIdentifier;
3321     case 'p':
3322       // print public private protected
3323       switch (length) {
3324       case 5:
3325         if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3326           return TokenNameprint;
3327         } else
3328           return TokenNameIdentifier;
3329       case 6:
3330         if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3331             && (data[++index] == 'c')) {
3332           return TokenNamepublic;
3333         } else
3334           return TokenNameIdentifier;
3335       case 7:
3336         if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3337             && (data[++index] == 't') && (data[++index] == 'e')) {
3338           return TokenNameprivate;
3339         } else
3340           return TokenNameIdentifier;
3341       case 9:
3342         if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3343             && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3344           return TokenNameprotected;
3345         } else
3346           return TokenNameIdentifier;
3347       }
3348       return TokenNameIdentifier;
3349     case 'r':
3350       //return require require_once
3351       if (length == 6) {
3352         if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3353             && (data[++index] == 'n')) {
3354           return TokenNamereturn;
3355         }
3356       } else if (length == 7) {
3357         if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3358             && (data[++index] == 'r') && (data[++index] == 'e')) {
3359           return TokenNamerequire;
3360         }
3361       } else if (length == 12) {
3362         if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3363             && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3364             && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3365           return TokenNamerequire_once;
3366         }
3367       } else
3368         return TokenNameIdentifier;
3369     case 's':
3370       //static switch
3371       switch (length) {
3372       case 6:
3373         if (data[++index] == 't')
3374           if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3375             return TokenNamestatic;
3376           } else
3377             return TokenNameIdentifier;
3378         else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3379             && (data[++index] == 'h'))
3380           return TokenNameswitch;
3381         else
3382           return TokenNameIdentifier;
3383       default:
3384         return TokenNameIdentifier;
3385       }
3386     case 't':
3387       // try true throw
3388       switch (length) {
3389       case 3:
3390         if ((data[++index] == 'r') && (data[++index] == 'y'))
3391           return TokenNametry;
3392         else
3393           return TokenNameIdentifier;
3394       //          case 4 :
3395       //            if ((data[++index] == 'r') && (data[++index] == 'u')
3396       //                && (data[++index] == 'e'))
3397       //              return TokenNametrue;
3398       //            else
3399       //              return TokenNameIdentifier;
3400       case 5:
3401         if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3402           return TokenNamethrow;
3403         else
3404           return TokenNameIdentifier;
3405       default:
3406         return TokenNameIdentifier;
3407       }
3408     case 'u':
3409       //use unset
3410       switch (length) {
3411       case 3:
3412         if ((data[++index] == 's') && (data[++index] == 'e'))
3413           return TokenNameuse;
3414         else
3415           return TokenNameIdentifier;
3416       case 5:
3417         if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3418           return TokenNameunset;
3419         else
3420           return TokenNameIdentifier;
3421       default:
3422         return TokenNameIdentifier;
3423       }
3424     case 'v':
3425       //var
3426       switch (length) {
3427       case 3:
3428         if ((data[++index] == 'a') && (data[++index] == 'r'))
3429           return TokenNamevar;
3430         else
3431           return TokenNameIdentifier;
3432       default:
3433         return TokenNameIdentifier;
3434       }
3435     case 'w':
3436       //while
3437       switch (length) {
3438       case 5:
3439         if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3440           return TokenNamewhile;
3441         else
3442           return TokenNameIdentifier;
3443       //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3444       // (data[++index]=='e') && (data[++index]=='f')&&
3445       // (data[++index]=='p'))
3446       //return TokenNamewidefp ;
3447       //else
3448       //return TokenNameIdentifier;
3449       default:
3450         return TokenNameIdentifier;
3451       }
3452     case 'x':
3453       //xor
3454       switch (length) {
3455       case 3:
3456         if ((data[++index] == 'o') && (data[++index] == 'r'))
3457           return TokenNamexor;
3458         else
3459           return TokenNameIdentifier;
3460       default:
3461         return TokenNameIdentifier;
3462       }
3463     default:
3464       return TokenNameIdentifier;
3465     }
3466   }
3467
3468   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3469     //when entering this method the currentCharacter is the firt
3470     //digit of the number , i.e. it may be preceeded by a . when
3471     //dotPrefix is true
3472     boolean floating = dotPrefix;
3473     if ((!dotPrefix) && (currentCharacter == '0')) {
3474       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3475         //force the first char of the hexa number do exist...
3476         // consume next character
3477         unicodeAsBackSlash = false;
3478         currentCharacter = source[currentPosition++];
3479         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3480         //          && (source[currentPosition] == 'u')) {
3481         //          getNextUnicodeChar();
3482         //        } else {
3483         //          if (withoutUnicodePtr != 0) {
3484         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3485         //          }
3486         //        }
3487         if (Character.digit(currentCharacter, 16) == -1)
3488           throw new InvalidInputException(INVALID_HEXA);
3489         //---end forcing--
3490         while (getNextCharAsDigit(16)) {
3491         }
3492         ;
3493         //        if (getNextChar('l', 'L') >= 0)
3494         //          return TokenNameLongLiteral;
3495         //        else
3496         return TokenNameIntegerLiteral;
3497       }
3498       //there is x or X in the number
3499       //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3500       // 00078.0 is true !!!!! crazy language
3501       if (getNextCharAsDigit()) {
3502         //-------------potential octal-----------------
3503         while (getNextCharAsDigit()) {
3504         }
3505         ;
3506         //        if (getNextChar('l', 'L') >= 0) {
3507         //          return TokenNameLongLiteral;
3508         //        }
3509         //
3510         //        if (getNextChar('f', 'F') >= 0) {
3511         //          return TokenNameFloatingPointLiteral;
3512         //        }
3513         if (getNextChar('d', 'D') >= 0) {
3514           return TokenNameDoubleLiteral;
3515         } else { //make the distinction between octal and float ....
3516           if (getNextChar('.')) { //bingo ! ....
3517             while (getNextCharAsDigit()) {
3518             }
3519             ;
3520             if (getNextChar('e', 'E') >= 0) {
3521               // consume next character
3522               unicodeAsBackSlash = false;
3523               currentCharacter = source[currentPosition++];
3524               //              if (((currentCharacter = source[currentPosition++]) == '\\')
3525               //                && (source[currentPosition] == 'u')) {
3526               //                getNextUnicodeChar();
3527               //              } else {
3528               //                if (withoutUnicodePtr != 0) {
3529               //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3530               //                }
3531               //              }
3532               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3533                 // consume next character
3534                 unicodeAsBackSlash = false;
3535                 currentCharacter = source[currentPosition++];
3536                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3537                 //                  && (source[currentPosition] == 'u')) {
3538                 //                  getNextUnicodeChar();
3539                 //                } else {
3540                 //                  if (withoutUnicodePtr != 0) {
3541                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3542                 //                      currentCharacter;
3543                 //                  }
3544                 //                }
3545               }
3546               if (!Character.isDigit(currentCharacter))
3547                 throw new InvalidInputException(INVALID_FLOAT);
3548               while (getNextCharAsDigit()) {
3549               }
3550               ;
3551             }
3552             //            if (getNextChar('f', 'F') >= 0)
3553             //              return TokenNameFloatingPointLiteral;
3554             getNextChar('d', 'D'); //jump over potential d or D
3555             return TokenNameDoubleLiteral;
3556           } else {
3557             return TokenNameIntegerLiteral;
3558           }
3559         }
3560       } else {
3561         /* carry on */
3562       }
3563     }
3564     while (getNextCharAsDigit()) {
3565     }
3566     ;
3567     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3568     //      return TokenNameLongLiteral;
3569     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3570       while (getNextCharAsDigit()) {
3571       }
3572       ;
3573       floating = true;
3574     }
3575     //if floating is true both exponant and suffix may be optional
3576     if (getNextChar('e', 'E') >= 0) {
3577       floating = true;
3578       // consume next character
3579       unicodeAsBackSlash = false;
3580       currentCharacter = source[currentPosition++];
3581       //      if (((currentCharacter = source[currentPosition++]) == '\\')
3582       //        && (source[currentPosition] == 'u')) {
3583       //        getNextUnicodeChar();
3584       //      } else {
3585       //        if (withoutUnicodePtr != 0) {
3586       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3587       //        }
3588       //      }
3589       if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3590         // next
3591         // character
3592         unicodeAsBackSlash = false;
3593         currentCharacter = source[currentPosition++];
3594         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3595         //          && (source[currentPosition] == 'u')) {
3596         //          getNextUnicodeChar();
3597         //        } else {
3598         //          if (withoutUnicodePtr != 0) {
3599         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3600         //          }
3601         //        }
3602       }
3603       if (!Character.isDigit(currentCharacter))
3604         throw new InvalidInputException(INVALID_FLOAT);
3605       while (getNextCharAsDigit()) {
3606       }
3607       ;
3608     }
3609     if (getNextChar('d', 'D') >= 0)
3610       return TokenNameDoubleLiteral;
3611     //    if (getNextChar('f', 'F') >= 0)
3612     //      return TokenNameFloatingPointLiteral;
3613     //the long flag has been tested before
3614     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3615   }
3616
3617   /**
3618    * Search the line number corresponding to a specific position
3619    *
3620    */
3621   public final int getLineNumber(int position) {
3622     if (lineEnds == null)
3623       return 1;
3624     int length = linePtr + 1;
3625     if (length == 0)
3626       return 1;
3627     int g = 0, d = length - 1;
3628     int m = 0;
3629     while (g <= d) {
3630       m = (g + d) / 2;
3631       if (position < lineEnds[m]) {
3632         d = m - 1;
3633       } else if (position > lineEnds[m]) {
3634         g = m + 1;
3635       } else {
3636         return m + 1;
3637       }
3638     }
3639     if (position < lineEnds[m]) {
3640       return m + 1;
3641     }
3642     return m + 2;
3643   }
3644
3645   public void setPHPMode(boolean mode) {
3646     phpMode = mode;
3647   }
3648
3649   public final void setSource(char[] source) {
3650     setSource(null, source);
3651   }
3652
3653   public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3654     //the source-buffer is set to sourceString
3655     this.compilationUnit = compilationUnit;
3656     if (source == null) {
3657       this.source = new char[0];
3658     } else {
3659       this.source = source;
3660     }
3661     startPosition = -1;
3662     initialPosition = currentPosition = 0;
3663     containsAssertKeyword = false;
3664     withoutUnicodeBuffer = new char[this.source.length];
3665     encapsedStringStack = new Stack();
3666   }
3667
3668   public String toString() {
3669     if (startPosition == source.length)
3670       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3671     if (currentPosition > source.length)
3672       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3673     char front[] = new char[startPosition];
3674     System.arraycopy(source, 0, front, 0, startPosition);
3675     int middleLength = (currentPosition - 1) - startPosition + 1;
3676     char middle[];
3677     if (middleLength > -1) {
3678       middle = new char[middleLength];
3679       System.arraycopy(source, startPosition, middle, 0, middleLength);
3680     } else {
3681       middle = new char[0];
3682     }
3683     char end[] = new char[source.length - (currentPosition - 1)];
3684     System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3685     return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3686         + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3687         + new String(end);
3688   }
3689
3690   public final String toStringAction(int act) {
3691     switch (act) {
3692     case TokenNameERROR:
3693       return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3694     // //$NON-NLS-1$
3695     case TokenNameINLINE_HTML:
3696       return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3697     case TokenNameIdentifier:
3698       return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3699     case TokenNameVariable:
3700       return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3701     case TokenNameabstract:
3702       return "abstract"; //$NON-NLS-1$
3703     case TokenNameand:
3704       return "AND"; //$NON-NLS-1$
3705     case TokenNamearray:
3706       return "array"; //$NON-NLS-1$
3707     case TokenNameas:
3708       return "as"; //$NON-NLS-1$
3709     case TokenNamebreak:
3710       return "break"; //$NON-NLS-1$
3711     case TokenNamecase:
3712       return "case"; //$NON-NLS-1$
3713     case TokenNameclass:
3714       return "class"; //$NON-NLS-1$
3715     case TokenNamecatch:
3716       return "catch"; //$NON-NLS-1$
3717     case TokenNameclone:
3718       //$NON-NLS-1$
3719       return "clone";
3720     case TokenNameconst:
3721       //$NON-NLS-1$
3722       return "const";
3723     case TokenNamecontinue:
3724       return "continue"; //$NON-NLS-1$
3725     case TokenNamedefault:
3726       return "default"; //$NON-NLS-1$
3727     //      case TokenNamedefine :
3728     //        return "define"; //$NON-NLS-1$
3729     case TokenNamedo:
3730       return "do"; //$NON-NLS-1$
3731     case TokenNameecho:
3732       return "echo"; //$NON-NLS-1$
3733     case TokenNameelse:
3734       return "else"; //$NON-NLS-1$
3735     case TokenNameelseif:
3736       return "elseif"; //$NON-NLS-1$
3737     case TokenNameendfor:
3738       return "endfor"; //$NON-NLS-1$
3739     case TokenNameendforeach:
3740       return "endforeach"; //$NON-NLS-1$
3741     case TokenNameendif:
3742       return "endif"; //$NON-NLS-1$
3743     case TokenNameendswitch:
3744       return "endswitch"; //$NON-NLS-1$
3745     case TokenNameendwhile:
3746       return "endwhile"; //$NON-NLS-1$
3747     case TokenNameexit:
3748       return "exit";
3749     case TokenNameextends:
3750       return "extends"; //$NON-NLS-1$
3751     //      case TokenNamefalse :
3752     //        return "false"; //$NON-NLS-1$
3753     case TokenNamefinal:
3754       return "final"; //$NON-NLS-1$
3755     case TokenNamefor:
3756       return "for"; //$NON-NLS-1$
3757     case TokenNameforeach:
3758       return "foreach"; //$NON-NLS-1$
3759     case TokenNamefunction:
3760       return "function"; //$NON-NLS-1$
3761     case TokenNameglobal:
3762       return "global"; //$NON-NLS-1$
3763     case TokenNameif:
3764       return "if"; //$NON-NLS-1$
3765     case TokenNameimplements:
3766       return "implements"; //$NON-NLS-1$
3767     case TokenNameinclude:
3768       return "include"; //$NON-NLS-1$
3769     case TokenNameinclude_once:
3770       return "include_once"; //$NON-NLS-1$
3771     case TokenNameinstanceof:
3772       return "instanceof"; //$NON-NLS-1$
3773     case TokenNameinterface:
3774       return "interface"; //$NON-NLS-1$
3775     case TokenNameisset:
3776       return "isset"; //$NON-NLS-1$
3777     case TokenNamelist:
3778       return "list"; //$NON-NLS-1$
3779     case TokenNamenew:
3780       return "new"; //$NON-NLS-1$
3781     //      case TokenNamenull :
3782     //        return "null"; //$NON-NLS-1$
3783     case TokenNameor:
3784       return "OR"; //$NON-NLS-1$
3785     case TokenNameprint:
3786       return "print"; //$NON-NLS-1$
3787     case TokenNameprivate:
3788       return "private"; //$NON-NLS-1$
3789     case TokenNameprotected:
3790       return "protected"; //$NON-NLS-1$
3791     case TokenNamepublic:
3792       return "public"; //$NON-NLS-1$
3793     case TokenNamerequire:
3794       return "require"; //$NON-NLS-1$
3795     case TokenNamerequire_once:
3796       return "require_once"; //$NON-NLS-1$
3797     case TokenNamereturn:
3798       return "return"; //$NON-NLS-1$
3799     case TokenNamestatic:
3800       return "static"; //$NON-NLS-1$
3801     case TokenNameswitch:
3802       return "switch"; //$NON-NLS-1$
3803     //      case TokenNametrue :
3804     //        return "true"; //$NON-NLS-1$
3805     case TokenNameunset:
3806       return "unset"; //$NON-NLS-1$
3807     case TokenNamevar:
3808       return "var"; //$NON-NLS-1$
3809     case TokenNamewhile:
3810       return "while"; //$NON-NLS-1$
3811     case TokenNamexor:
3812       return "XOR"; //$NON-NLS-1$
3813     //      case TokenNamethis :
3814     //        return "$this"; //$NON-NLS-1$
3815     case TokenNameIntegerLiteral:
3816       return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3817     case TokenNameDoubleLiteral:
3818       return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3819     case TokenNameStringDoubleQuote:
3820       return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3821     case TokenNameStringSingleQuote:
3822       return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3823     case TokenNameStringInterpolated:
3824       return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3825     case TokenNameEncapsedString0:
3826       return "`"; //$NON-NLS-1$
3827     case TokenNameEncapsedString1:
3828       return "\'"; //$NON-NLS-1$
3829     case TokenNameEncapsedString2:
3830       return "\""; //$NON-NLS-1$
3831     case TokenNameSTRING:
3832       return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3833     case TokenNameHEREDOC:
3834       return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3835     case TokenNamePLUS_PLUS:
3836       return "++"; //$NON-NLS-1$
3837     case TokenNameMINUS_MINUS:
3838       return "--"; //$NON-NLS-1$
3839     case TokenNameEQUAL_EQUAL:
3840       return "=="; //$NON-NLS-1$
3841     case TokenNameEQUAL_EQUAL_EQUAL:
3842       return "==="; //$NON-NLS-1$
3843     case TokenNameEQUAL_GREATER:
3844       return "=>"; //$NON-NLS-1$
3845     case TokenNameLESS_EQUAL:
3846       return "<="; //$NON-NLS-1$
3847     case TokenNameGREATER_EQUAL:
3848       return ">="; //$NON-NLS-1$
3849     case TokenNameNOT_EQUAL:
3850       return "!="; //$NON-NLS-1$
3851     case TokenNameNOT_EQUAL_EQUAL:
3852       return "!=="; //$NON-NLS-1$
3853     case TokenNameLEFT_SHIFT:
3854       return "<<"; //$NON-NLS-1$
3855     case TokenNameRIGHT_SHIFT:
3856       return ">>"; //$NON-NLS-1$
3857     case TokenNamePLUS_EQUAL:
3858       return "+="; //$NON-NLS-1$
3859     case TokenNameMINUS_EQUAL:
3860       return "-="; //$NON-NLS-1$
3861     case TokenNameMULTIPLY_EQUAL:
3862       return "*="; //$NON-NLS-1$
3863     case TokenNameDIVIDE_EQUAL:
3864       return "/="; //$NON-NLS-1$
3865     case TokenNameAND_EQUAL:
3866       return "&="; //$NON-NLS-1$
3867     case TokenNameOR_EQUAL:
3868       return "|="; //$NON-NLS-1$
3869     case TokenNameXOR_EQUAL:
3870       return "^="; //$NON-NLS-1$
3871     case TokenNameREMAINDER_EQUAL:
3872       return "%="; //$NON-NLS-1$
3873     case TokenNameDOT_EQUAL:
3874       return ".="; //$NON-NLS-1$
3875     case TokenNameLEFT_SHIFT_EQUAL:
3876       return "<<="; //$NON-NLS-1$
3877     case TokenNameRIGHT_SHIFT_EQUAL:
3878       return ">>="; //$NON-NLS-1$
3879     case TokenNameOR_OR:
3880       return "||"; //$NON-NLS-1$
3881     case TokenNameAND_AND:
3882       return "&&"; //$NON-NLS-1$
3883     case TokenNamePLUS:
3884       return "+"; //$NON-NLS-1$
3885     case TokenNameMINUS:
3886       return "-"; //$NON-NLS-1$
3887     case TokenNameMINUS_GREATER:
3888       return "->";
3889     case TokenNameNOT:
3890       return "!"; //$NON-NLS-1$
3891     case TokenNameREMAINDER:
3892       return "%"; //$NON-NLS-1$
3893     case TokenNameXOR:
3894       return "^"; //$NON-NLS-1$
3895     case TokenNameAND:
3896       return "&"; //$NON-NLS-1$
3897     case TokenNameMULTIPLY:
3898       return "*"; //$NON-NLS-1$
3899     case TokenNameOR:
3900       return "|"; //$NON-NLS-1$
3901     case TokenNameTWIDDLE:
3902       return "~"; //$NON-NLS-1$
3903     case TokenNameTWIDDLE_EQUAL:
3904       return "~="; //$NON-NLS-1$
3905     case TokenNameDIVIDE:
3906       return "/"; //$NON-NLS-1$
3907     case TokenNameGREATER:
3908       return ">"; //$NON-NLS-1$
3909     case TokenNameLESS:
3910       return "<"; //$NON-NLS-1$
3911     case TokenNameLPAREN:
3912       return "("; //$NON-NLS-1$
3913     case TokenNameRPAREN:
3914       return ")"; //$NON-NLS-1$
3915     case TokenNameLBRACE:
3916       return "{"; //$NON-NLS-1$
3917     case TokenNameRBRACE:
3918       return "}"; //$NON-NLS-1$
3919     case TokenNameLBRACKET:
3920       return "["; //$NON-NLS-1$
3921     case TokenNameRBRACKET:
3922       return "]"; //$NON-NLS-1$
3923     case TokenNameSEMICOLON:
3924       return ";"; //$NON-NLS-1$
3925     case TokenNameQUESTION:
3926       return "?"; //$NON-NLS-1$
3927     case TokenNameCOLON:
3928       return ":"; //$NON-NLS-1$
3929     case TokenNameCOMMA:
3930       return ","; //$NON-NLS-1$
3931     case TokenNameDOT:
3932       return "."; //$NON-NLS-1$
3933     case TokenNameEQUAL:
3934       return "="; //$NON-NLS-1$
3935     case TokenNameAT:
3936       return "@";
3937     case TokenNameDOLLAR:
3938       return "$";
3939     case TokenNameDOLLAR_LBRACE:
3940       return "${";
3941     case TokenNameLBRACE_DOLLAR:
3942       return "{$";
3943     case TokenNameEOF:
3944       return "EOF"; //$NON-NLS-1$
3945     case TokenNameWHITESPACE:
3946       return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3947     case TokenNameCOMMENT_LINE:
3948       return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3949     case TokenNameCOMMENT_BLOCK:
3950       return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3951     case TokenNameCOMMENT_PHPDOC:
3952       return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3953     //      case TokenNameHTML :
3954     //        return "HTML(" + new String(getCurrentTokenSource()) + ")";
3955     // //$NON-NLS-1$
3956     case TokenNameFILE:
3957       return "__FILE__"; //$NON-NLS-1$
3958     case TokenNameLINE:
3959       return "__LINE__"; //$NON-NLS-1$
3960     case TokenNameCLASS_C:
3961       return "__CLASS__"; //$NON-NLS-1$
3962     case TokenNameMETHOD_C:
3963       return "__METHOD__"; //$NON-NLS-1$
3964     case TokenNameFUNC_C:
3965       return "__FUNCTION__"; //$NON-NLS-1
3966     case TokenNameboolCAST:
3967       return "( bool )"; //$NON-NLS-1$
3968     case TokenNameintCAST:
3969       return "( int )"; //$NON-NLS-1$
3970     case TokenNamedoubleCAST:
3971       return "( double )"; //$NON-NLS-1$
3972     case TokenNameobjectCAST:
3973       return "( object )"; //$NON-NLS-1$
3974     case TokenNamestringCAST:
3975       return "( string )"; //$NON-NLS-1$
3976     default:
3977       return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3978     }
3979   }
3980
3981   public Scanner() {
3982     this(false, false);
3983   }
3984
3985   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
3986     this(tokenizeComments, tokenizeWhiteSpace, false);
3987   }
3988
3989   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
3990     this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
3991   }
3992
3993   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
3994       boolean assertMode) {
3995     this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null,true);
3996   }
3997
3998   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3999       boolean checkNonExternalizedStringLiterals,
4000       boolean assertMode, boolean tokenizeStrings,
4001       char[][] taskTags,
4002       char[][] taskPriorities,
4003       boolean isTaskCaseSensitive) {
4004     this.eofPosition = Integer.MAX_VALUE;
4005     this.tokenizeComments = tokenizeComments;
4006     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
4007     this.tokenizeStrings = tokenizeStrings;
4008     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
4009     this.assertMode = assertMode;
4010     this.encapsedStringStack = null;
4011     this.taskTags = taskTags;
4012     this.taskPriorities = taskPriorities;
4013   }
4014
4015   private void checkNonExternalizeString() throws InvalidInputException {
4016     if (currentLine == null)
4017       return;
4018     parseTags(currentLine);
4019   }
4020
4021   private void parseTags(NLSLine line) throws InvalidInputException {
4022     String s = new String(getCurrentTokenSource());
4023     int pos = s.indexOf(TAG_PREFIX);
4024     int lineLength = line.size();
4025     while (pos != -1) {
4026       int start = pos + TAG_PREFIX_LENGTH;
4027       int end = s.indexOf(TAG_POSTFIX, start);
4028       String index = s.substring(start, end);
4029       int i = 0;
4030       try {
4031         i = Integer.parseInt(index) - 1;
4032         // Tags are one based not zero based.
4033       } catch (NumberFormatException e) {
4034         i = -1; // we don't want to consider this as a valid NLS tag
4035       }
4036       if (line.exists(i)) {
4037         line.set(i, null);
4038       }
4039       pos = s.indexOf(TAG_PREFIX, start);
4040     }
4041     this.nonNLSStrings = new StringLiteral[lineLength];
4042     int nonNLSCounter = 0;
4043     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4044       StringLiteral literal = (StringLiteral) iterator.next();
4045       if (literal != null) {
4046         this.nonNLSStrings[nonNLSCounter++] = literal;
4047       }
4048     }
4049     if (nonNLSCounter == 0) {
4050       this.nonNLSStrings = null;
4051       currentLine = null;
4052       return;
4053     }
4054     this.wasNonExternalizedStringLiteral = true;
4055     if (nonNLSCounter != lineLength) {
4056       System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4057     }
4058     currentLine = null;
4059   }
4060
4061   public final void scanEscapeCharacter() throws InvalidInputException {
4062     // the string with "\\u" is a legal string of two chars \ and u
4063     //thus we use a direct access to the source (for regular cases).
4064     if (unicodeAsBackSlash) {
4065       // consume next character
4066       unicodeAsBackSlash = false;
4067       //                        if (((currentCharacter = source[currentPosition++]) == '\\') &&
4068       // (source[currentPosition] == 'u')) {
4069       //                                getNextUnicodeChar();
4070       //                        } else {
4071       if (withoutUnicodePtr != 0) {
4072         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4073         //                              }
4074       }
4075     } else
4076       currentCharacter = source[currentPosition++];
4077     switch (currentCharacter) {
4078     case 'b':
4079       currentCharacter = '\b';
4080       break;
4081     case 't':
4082       currentCharacter = '\t';
4083       break;
4084     case 'n':
4085       currentCharacter = '\n';
4086       break;
4087     case 'f':
4088       currentCharacter = '\f';
4089       break;
4090     case 'r':
4091       currentCharacter = '\r';
4092       break;
4093     case '\"':
4094       currentCharacter = '\"';
4095       break;
4096     case '\'':
4097       currentCharacter = '\'';
4098       break;
4099     case '\\':
4100       currentCharacter = '\\';
4101       break;
4102     default:
4103       // -----------octal escape--------------
4104       // OctalDigit
4105       // OctalDigit OctalDigit
4106       // ZeroToThree OctalDigit OctalDigit
4107       int number = Character.getNumericValue(currentCharacter);
4108       if (number >= 0 && number <= 7) {
4109         boolean zeroToThreeNot = number > 3;
4110         if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4111           int digit = Character.getNumericValue(currentCharacter);
4112           if (digit >= 0 && digit <= 7) {
4113             number = (number * 8) + digit;
4114             if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4115               if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4116                 // Digit --> ignore last character
4117                 currentPosition--;
4118               } else {
4119                 digit = Character.getNumericValue(currentCharacter);
4120                 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4121                   // OctalDigit OctalDigit
4122                   number = (number * 8) + digit;
4123                 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4124                   // --> ignore last character
4125                   currentPosition--;
4126                 }
4127               }
4128             } else { // has read \OctalDigit NonDigit--> ignore last
4129               // character
4130               currentPosition--;
4131             }
4132           } else { // has read \OctalDigit NonOctalDigit--> ignore last
4133             // character
4134             currentPosition--;
4135           }
4136         } else { // has read \OctalDigit --> ignore last character
4137           currentPosition--;
4138         }
4139         if (number > 255)
4140           throw new InvalidInputException(INVALID_ESCAPE);
4141         currentCharacter = (char) number;
4142       } else
4143         throw new InvalidInputException(INVALID_ESCAPE);
4144     }
4145   }
4146
4147 //chech presence of task: tags
4148 //TODO (frederic) see if we need to take unicode characters into account...
4149 public void checkTaskTag(int commentStart, int commentEnd) {
4150         char[] src = this.source;
4151
4152         // only look for newer task: tags
4153         if (this.foundTaskCount > 0
4154                 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4155                 return;
4156         }
4157         int foundTaskIndex = this.foundTaskCount;
4158         char previous = src[commentStart+1]; // should be '*' or '/'
4159         nextChar : for (
4160                 int i = commentStart + 2; i < commentEnd && i < this.eofPosition; i++) {
4161                 char[] tag = null;
4162                 char[] priority = null;
4163                 // check for tag occurrence only if not ambiguous with javadoc tag
4164                 if (previous != '@') {
4165                         nextTag : for (int itag = 0; itag < this.taskTags.length; itag++) {
4166                                 tag = this.taskTags[itag];
4167                                 int tagLength = tag.length;
4168                                 if (tagLength == 0) continue nextTag;
4169
4170                                 // ensure tag is not leaded with letter if tag starts with a letter
4171                                 if (Character.isJavaIdentifierStart(tag[0])) {
4172                                         if (Character.isJavaIdentifierPart(previous)) {
4173                                                 continue nextTag;
4174                                         }
4175                                 }
4176
4177                                 for (int t = 0; t < tagLength; t++) {
4178                                         char sc, tc;
4179                                         int x = i+t;
4180                                         if (x >= this.eofPosition || x >= commentEnd) continue nextTag;
4181                                         if ((sc = src[i + t]) != (tc = tag[t])) {                                                                                                                                                                       // case sensitive check
4182                                                 if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) {     // case insensitive check
4183                                                         continue nextTag;
4184                                                 }
4185                                         }
4186                                 }
4187                                 // ensure tag is not followed with letter if tag finishes with a letter
4188                                 if (i+tagLength < commentEnd && Character.isJavaIdentifierPart(src[i+tagLength-1])) {
4189                                         if (Character.isJavaIdentifierPart(src[i + tagLength]))
4190                                                 continue nextTag;
4191                                 }
4192                                 if (this.foundTaskTags == null) {
4193                                         this.foundTaskTags = new char[5][];
4194                                         this.foundTaskMessages = new char[5][];
4195                                         this.foundTaskPriorities = new char[5][];
4196                                         this.foundTaskPositions = new int[5][];
4197                                 } else if (this.foundTaskCount == this.foundTaskTags.length) {
4198                                         System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4199                                         System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4200                                         System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4201                                         System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4202                                 }
4203
4204                                 priority = this.taskPriorities != null && itag < this.taskPriorities.length
4205                                                         ? this.taskPriorities[itag]
4206                                                         : null;
4207
4208                                 this.foundTaskTags[this.foundTaskCount] = tag;
4209                                 this.foundTaskPriorities[this.foundTaskCount] = priority;
4210                                 this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4211                                 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4212                                 this.foundTaskCount++;
4213                                 i += tagLength - 1; // will be incremented when looping
4214                                 break nextTag;
4215                         }
4216                 }
4217                 previous = src[i];
4218         }
4219         for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4220                 // retrieve message start and end positions
4221                 int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4222                 int max_value = i + 1 < this.foundTaskCount
4223                                 ? this.foundTaskPositions[i + 1][0] - 1
4224                                 : commentEnd - 1;
4225                 // at most beginning of next task
4226                 if (max_value < msgStart) {
4227                         max_value = msgStart; // would only occur if tag is before EOF.
4228                 }
4229                 int end = -1;
4230                 char c;
4231                 for (int j = msgStart; j < max_value; j++) {
4232                         if ((c = src[j]) == '\n' || c == '\r') {
4233                                 end = j - 1;
4234                                 break;
4235                         }
4236                 }
4237                 if (end == -1) {
4238                         for (int j = max_value; j > msgStart; j--) {
4239                                 if ((c = src[j]) == '*') {
4240                                         end = j - 1;
4241                                         break;
4242                                 }
4243                         }
4244                         if (end == -1)
4245                                 end = max_value;
4246                 }
4247                 if (msgStart == end)
4248                         continue; // empty
4249                 // trim the message
4250                 while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
4251                         end--;
4252                 while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
4253                         msgStart++;
4254                 // update the end position of the task
4255                 this.foundTaskPositions[i][1] = end;
4256                 // get the message source
4257                 final int messageLength = end - msgStart + 1;
4258                 char[] message = new char[messageLength];
4259                 System.arraycopy(src, msgStart, message, 0, messageLength);
4260                 this.foundTaskMessages[i] = message;
4261         }
4262 }
4263
4264   // chech presence of task: tags
4265 //  public void checkTaskTag(int commentStart, int commentEnd) {
4266 //    // only look for newer task: tags
4267 //    if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4268 //      return;
4269 //    }
4270 //    int foundTaskIndex = this.foundTaskCount;
4271 //    nextChar: for (int i = commentStart; i < commentEnd && i < this.eofPosition; i++) {
4272 //      char[] tag = null;
4273 //      char[] priority = null;
4274 //      // check for tag occurrence
4275 //      nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4276 //        tag = this.taskTags[itag];
4277 //        priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4278 //        int tagLength = tag.length;
4279 //        for (int t = 0; t < tagLength; t++) {
4280 //          if (this.source[i + t] != tag[t])
4281 //            continue nextTag;
4282 //        }
4283 //        if (this.foundTaskTags == null) {
4284 //          this.foundTaskTags = new char[5][];
4285 //          this.foundTaskMessages = new char[5][];
4286 //          this.foundTaskPriorities = new char[5][];
4287 //          this.foundTaskPositions = new int[5][];
4288 //        } else if (this.foundTaskCount == this.foundTaskTags.length) {
4289 //          System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4290 //          System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4291 //              this.foundTaskCount);
4292 //          System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4293 //              this.foundTaskCount);
4294 //          System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4295 //              this.foundTaskCount);
4296 //        }
4297 //        this.foundTaskTags[this.foundTaskCount] = tag;
4298 //        this.foundTaskPriorities[this.foundTaskCount] = priority;
4299 //        this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4300 //        this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4301 //        this.foundTaskCount++;
4302 //        i += tagLength - 1; // will be incremented when looping
4303 //      }
4304 //    }
4305 //    for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4306 //      // retrieve message start and end positions
4307 //      int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4308 //      int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4309 //      // at most beginning of next task
4310 //      if (max_value < msgStart)
4311 //        max_value = msgStart; // would only occur if tag is before EOF.
4312 //      int end = -1;
4313 //      char c;
4314 //      for (int j = msgStart; j < max_value; j++) {
4315 //        if ((c = this.source[j]) == '\n' || c == '\r') {
4316 //          end = j - 1;
4317 //          break;
4318 //        }
4319 //      }
4320 //      if (end == -1) {
4321 //        for (int j = max_value; j > msgStart; j--) {
4322 //          if ((c = this.source[j]) == '*') {
4323 //            end = j - 1;
4324 //            break;
4325 //          }
4326 //        }
4327 //        if (end == -1)
4328 //          end = max_value;
4329 //      }
4330 //      if (msgStart == end)
4331 //        continue; // empty
4332 //      // trim the message
4333 //      while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4334 //        end--;
4335 //      while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4336 //        msgStart++;
4337 //      // update the end position of the task
4338 //      this.foundTaskPositions[i][1] = end;
4339 //      // get the message source
4340 //      final int messageLength = end - msgStart + 1;
4341 //      char[] message = new char[messageLength];
4342 //      System.arraycopy(source, msgStart, message, 0, messageLength);
4343 //      this.foundTaskMessages[i] = message;
4344 //    }
4345 //  }
4346 }