net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java

   1 /***********************************************************************************************************************************
   2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
   3  * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
   4  * and is available at http://www.eclipse.org/legal/cpl-v05.html
   5  *
   6  * Contributors: IBM Corporation - initial API and implementation
   7  **********************************************************************************************************************************/
   8 package net.sourceforge.phpdt.internal.compiler.parser;
   9
  10 import java.util.ArrayList;
  11 import java.util.Iterator;
  12 import java.util.List;
  13 import java.util.Stack;
  14
  15 import net.sourceforge.phpdt.core.compiler.CharOperation;
  16 import net.sourceforge.phpdt.core.compiler.IScanner;
  17 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
  18 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
  19 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
  20 import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
  21
  22 public class Scanner implements IScanner, ITerminalSymbols {
  23   /*
  24    * APIs ares - getNextToken() which return the current type of the token (this value is not memorized by the scanner) -
  25    * getCurrentTokenSource() which provides with the token "REAL" source (aka all unicode have been transformed into a correct char) -
  26    * sourceStart gives the position into the stream - currentPosition-1 gives the sourceEnd position into the stream
  27    */
  28   // 1.4 feature
  29   private boolean assertMode;
  30
  31   public boolean useAssertAsAnIndentifier = false;
  32
  33   //flag indicating if processed source contains occurrences of keyword assert
  34   public boolean containsAssertKeyword = false;
  35
  36   public boolean recordLineSeparator;
  37
  38   public boolean ignorePHPOneLiner = false;
  39
  40   public boolean phpMode = false;
  41
  42   public Stack encapsedStringStack = null;
  43
  44   public char currentCharacter;
  45
  46   public int startPosition;
  47
  48   public int currentPosition;
  49
  50   public int initialPosition, eofPosition;
  51
  52   // after this position eof are generated instead of real token from the
  53   // source
  54   public boolean tokenizeComments;
  55
  56   public boolean tokenizeWhiteSpace;
  57
  58   public boolean tokenizeStrings;
  59
  60   //source should be viewed as a window (aka a part)
  61   //of a entire very large stream
  62   public char source[];
  63
  64   //unicode support
  65   public char[] withoutUnicodeBuffer;
  66
  67   public int withoutUnicodePtr;
  68
  69   //when == 0 ==> no unicode in the current token
  70   public boolean unicodeAsBackSlash = false;
  71
  72   public boolean scanningFloatLiteral = false;
  73
  74   //support for /** comments
  75   public int[] commentStops = new int[10];
  76
  77   public int[] commentStarts = new int[10];
  78
  79   public int commentPtr = -1; // no comment test with commentPtr value -1
  80
  81   protected int lastCommentLinePosition = -1;
  82
  83   //diet parsing support - jump over some method body when requested
  84   public boolean diet = false;
  85
  86   //support for the poor-line-debuggers ....
  87   //remember the position of the cr/lf
  88   public int[] lineEnds = new int[250];
  89
  90   public int linePtr = -1;
  91
  92   public boolean wasAcr = false;
  93
  94   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
  95
  96   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
  97
  98   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
  99
 100   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
 101
 102   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
 103
 104   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
 105
 106   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
 107
 108   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
 109
 110   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
 111
 112   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
 113
 114   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
 115
 116   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
 117
 118   //----------------optimized identifier managment------------------
 119   static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
 120       charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
 121       charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
 122       charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
 123       charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
 124       charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
 125       charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
 126       charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
 127       charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
 128
 129   static final char[] charArray_va = new char[] { '$', 'a' }, charArray_vb = new char[] { '$', 'b' }, charArray_vc = new char[] {
 130       '$',
 131       'c' }, charArray_vd = new char[] { '$', 'd' }, charArray_ve = new char[] { '$', 'e' },
 132       charArray_vf = new char[] { '$', 'f' }, charArray_vg = new char[] { '$', 'g' }, charArray_vh = new char[] { '$', 'h' },
 133       charArray_vi = new char[] { '$', 'i' }, charArray_vj = new char[] { '$', 'j' }, charArray_vk = new char[] { '$', 'k' },
 134       charArray_vl = new char[] { '$', 'l' }, charArray_vm = new char[] { '$', 'm' }, charArray_vn = new char[] { '$', 'n' },
 135       charArray_vo = new char[] { '$', 'o' }, charArray_vp = new char[] { '$', 'p' }, charArray_vq = new char[] { '$', 'q' },
 136       charArray_vr = new char[] { '$', 'r' }, charArray_vs = new char[] { '$', 's' }, charArray_vt = new char[] { '$', 't' },
 137       charArray_vu = new char[] { '$', 'u' }, charArray_vv = new char[] { '$', 'v' }, charArray_vw = new char[] { '$', 'w' },
 138       charArray_vx = new char[] { '$', 'x' }, charArray_vy = new char[] { '$', 'y' }, charArray_vz = new char[] { '$', 'z' };
 139
 140   static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
 141
 142   static final int TableSize = 30, InternalTableSize = 6;
 143
 144   //30*6 = 180 entries
 145   public static final int OptimizedLength = 6;
 146
 147   public/* static */
 148   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
 149
 150   // support for detecting non-externalized string literals
 151   int currentLineNr = -1;
 152
 153   int previousLineNr = -1;
 154
 155   NLSLine currentLine = null;
 156
 157   List lines = new ArrayList();
 158
 159   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
 160
 161   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
 162
 163   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
 164
 165   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
 166
 167   public StringLiteral[] nonNLSStrings = null;
 168
 169   public boolean checkNonExternalizedStringLiterals = true;
 170
 171   public boolean wasNonExternalizedStringLiteral = false;
 172   /* static */{
 173     for (int i = 0; i < 6; i++) {
 174       for (int j = 0; j < TableSize; j++) {
 175         for (int k = 0; k < InternalTableSize; k++) {
 176           charArray_length[i][j][k] = initCharArray;
 177         }
 178       }
 179     }
 180   }
 181
 182   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
 183
 184   public static final int RoundBracket = 0;
 185
 186   public static final int SquareBracket = 1;
 187
 188   public static final int CurlyBracket = 2;
 189
 190   public static final int BracketKinds = 3;
 191
 192   // task tag support
 193   public char[][] foundTaskTags = null;
 194
 195   public char[][] foundTaskMessages;
 196
 197   public char[][] foundTaskPriorities = null;
 198
 199   public int[][] foundTaskPositions;
 200
 201   public int foundTaskCount = 0;
 202
 203   public char[][] taskTags = null;
 204
 205   public char[][] taskPriorities = null;
 206
 207   public boolean isTaskCaseSensitive = true;
 208
 209   public static final boolean DEBUG = false;
 210
 211   public static final boolean TRACE = false;
 212
 213   public ICompilationUnit compilationUnit = null;
 214
 215   /**
 216    * Determines if the specified character is permissible as the first character in a PHP identifier.
 217    *
 218    * The '$' character for HP variables isn't regarded as the first character !
 219    */
 220   public static boolean isPHPIdentifierStart(char ch) {
 221     return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 222   }
 223
 224   /**
 225    * Determines if the specified character may be part of a PHP identifier as other than the first character
 226    */
 227   public static boolean isPHPIdentifierPart(char ch) {
 228     return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 229   }
 230
 231   public final boolean atEnd() {
 232     // This code is not relevant if source is
 233     // Only a part of the real stream input
 234     return source.length == currentPosition;
 235   }
 236
 237   public char[] getCurrentIdentifierSource() {
 238     //return the token REAL source (aka unicodes are precomputed)
 239     char[] result;
 240     //    if (withoutUnicodePtr != 0)
 241     //      //0 is used as a fast test flag so the real first char is in position 1
 242     //      System.arraycopy(
 243     //        withoutUnicodeBuffer,
 244     //        1,
 245     //        result = new char[withoutUnicodePtr],
 246     //        0,
 247     //        withoutUnicodePtr);
 248     //    else {
 249     int length = currentPosition - startPosition;
 250     switch (length) { // see OptimizedLength
 251     case 1:
 252       return optimizedCurrentTokenSource1();
 253     case 2:
 254       return optimizedCurrentTokenSource2();
 255     case 3:
 256       return optimizedCurrentTokenSource3();
 257     case 4:
 258       return optimizedCurrentTokenSource4();
 259     case 5:
 260       return optimizedCurrentTokenSource5();
 261     case 6:
 262       return optimizedCurrentTokenSource6();
 263     }
 264     //no optimization
 265     System.arraycopy(source, startPosition, result = new char[length], 0, length);
 266     //   }
 267     return result;
 268   }
 269
 270   public int getCurrentTokenEndPosition() {
 271     return this.currentPosition - 1;
 272   }
 273
 274   public final char[] getCurrentTokenSource() {
 275     // Return the token REAL source (aka unicodes are precomputed)
 276     char[] result;
 277     //    if (withoutUnicodePtr != 0)
 278     //      // 0 is used as a fast test flag so the real first char is in position 1
 279     //      System.arraycopy(
 280     //        withoutUnicodeBuffer,
 281     //        1,
 282     //        result = new char[withoutUnicodePtr],
 283     //        0,
 284     //        withoutUnicodePtr);
 285     //    else {
 286     int length;
 287     System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
 288     //    }
 289     return result;
 290   }
 291
 292   public final char[] getCurrentTokenSource(int startPos) {
 293     // Return the token REAL source (aka unicodes are precomputed)
 294     char[] result;
 295     //    if (withoutUnicodePtr != 0)
 296     //      // 0 is used as a fast test flag so the real first char is in position 1
 297     //      System.arraycopy(
 298     //        withoutUnicodeBuffer,
 299     //        1,
 300     //        result = new char[withoutUnicodePtr],
 301     //        0,
 302     //        withoutUnicodePtr);
 303     //    else {
 304     int length;
 305     System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
 306     //  }
 307     return result;
 308   }
 309
 310   public final char[] getCurrentTokenSourceString() {
 311     //return the token REAL source (aka unicodes are precomputed).
 312     //REMOVE the two " that are at the beginning and the end.
 313     char[] result;
 314     if (withoutUnicodePtr != 0)
 315       //0 is used as a fast test flag so the real first char is in position 1
 316       System.arraycopy(withoutUnicodeBuffer, 2,
 317       //2 is 1 (real start) + 1 (to jump over the ")
 318           result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
 319     else {
 320       int length;
 321       System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 322     }
 323     return result;
 324   }
 325
 326   public int getCurrentTokenStartPosition() {
 327     return this.startPosition;
 328   }
 329
 330   public final char[] getCurrentStringLiteralSource() {
 331     // Return the token REAL source (aka unicodes are precomputed)
 332     if (startPosition + 1 >= currentPosition) {
 333       return new char[0];
 334     }
 335     char[] result;
 336     int length;
 337     System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 338     //    }
 339     return result;
 340   }
 341
 342   public final char[] getCurrentStringLiteralSource(int startPos) {
 343     // Return the token REAL source (aka unicodes are precomputed)
 344     char[] result;
 345     int length;
 346     System.arraycopy(source, startPos + 1, result = new char[length = currentPosition - startPos - 2], 0, length);
 347     //    }
 348     return result;
 349   }
 350
 351   /*
 352    * Search the source position corresponding to the end of a given line number
 353    *
 354    * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
 355    *
 356    * In case the given line number is inconsistent, answers -1.
 357    */
 358   public final int getLineEnd(int lineNumber) {
 359     if (lineEnds == null)
 360       return -1;
 361     if (lineNumber >= lineEnds.length)
 362       return -1;
 363     if (lineNumber <= 0)
 364       return -1;
 365     if (lineNumber == lineEnds.length - 1)
 366       return eofPosition;
 367     return lineEnds[lineNumber - 1];
 368     // next line start one character behind the lineEnd of the previous line
 369   }
 370
 371   /**
 372    * Search the source position corresponding to the beginning of a given line number
 373    *
 374    * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
 375    *
 376    * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
 377    *
 378    * In case the given line number is inconsistent, answers -1.
 379    */
 380   public final int getLineStart(int lineNumber) {
 381     if (lineEnds == null)
 382       return -1;
 383     if (lineNumber >= lineEnds.length)
 384       return -1;
 385     if (lineNumber <= 0)
 386       return -1;
 387     if (lineNumber == 1)
 388       return initialPosition;
 389     return lineEnds[lineNumber - 2] + 1;
 390     // next line start one character behind the lineEnd of the previous line
 391   }
 392
 393   public final boolean getNextChar(char testedChar) {
 394     //BOOLEAN
 395     //handle the case of unicode.
 396     //when a unicode appears then we must use a buffer that holds char
 397     // internal values
 398     //At the end of this method currentCharacter holds the new visited char
 399     //and currentPosition points right next after it
 400     //Both previous lines are true if the currentCharacter is == to the
 401     // testedChar
 402     //On false, no side effect has occured.
 403     //ALL getNextChar.... ARE OPTIMIZED COPIES
 404     int temp = currentPosition;
 405     try {
 406       currentCharacter = source[currentPosition++];
 407       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 408       //        && (source[currentPosition] == 'u')) {
 409       //        //-------------unicode traitement ------------
 410       //        int c1, c2, c3, c4;
 411       //        int unicodeSize = 6;
 412       //        currentPosition++;
 413       //        while (source[currentPosition] == 'u') {
 414       //          currentPosition++;
 415       //          unicodeSize++;
 416       //        }
 417       //
 418       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 419       //          || c1 < 0)
 420       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 421       //            || c2 < 0)
 422       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 423       //            || c3 < 0)
 424       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 425       //            || c4 < 0)) {
 426       //          currentPosition = temp;
 427       //          return false;
 428       //        }
 429       //
 430       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 431       //        if (currentCharacter != testedChar) {
 432       //          currentPosition = temp;
 433       //          return false;
 434       //        }
 435       //        unicodeAsBackSlash = currentCharacter == '\\';
 436       //
 437       //        //need the unicode buffer
 438       //        if (withoutUnicodePtr == 0) {
 439       //          //buffer all the entries that have been left aside....
 440       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 441       //          System.arraycopy(
 442       //            source,
 443       //            startPosition,
 444       //            withoutUnicodeBuffer,
 445       //            1,
 446       //            withoutUnicodePtr);
 447       //        }
 448       //        //fill the buffer with the char
 449       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 450       //        return true;
 451       //
 452       //      } //-------------end unicode traitement--------------
 453       //      else {
 454       if (currentCharacter != testedChar) {
 455         currentPosition = temp;
 456         return false;
 457       }
 458       unicodeAsBackSlash = false;
 459       //        if (withoutUnicodePtr != 0)
 460       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 461       return true;
 462       //      }
 463     } catch (IndexOutOfBoundsException e) {
 464       unicodeAsBackSlash = false;
 465       currentPosition = temp;
 466       return false;
 467     }
 468   }
 469
 470   public final int getNextChar(char testedChar1, char testedChar2) {
 471     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
 472     //test can be done with (x==0) for the first and (x>0) for the second
 473     //handle the case of unicode.
 474     //when a unicode appears then we must use a buffer that holds char
 475     // internal values
 476     //At the end of this method currentCharacter holds the new visited char
 477     //and currentPosition points right next after it
 478     //Both previous lines are true if the currentCharacter is == to the
 479     // testedChar1/2
 480     //On false, no side effect has occured.
 481     //ALL getNextChar.... ARE OPTIMIZED COPIES
 482     int temp = currentPosition;
 483     try {
 484       int result;
 485       currentCharacter = source[currentPosition++];
 486       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 487       //        && (source[currentPosition] == 'u')) {
 488       //        //-------------unicode traitement ------------
 489       //        int c1, c2, c3, c4;
 490       //        int unicodeSize = 6;
 491       //        currentPosition++;
 492       //        while (source[currentPosition] == 'u') {
 493       //          currentPosition++;
 494       //          unicodeSize++;
 495       //        }
 496       //
 497       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 498       //          || c1 < 0)
 499       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 500       //            || c2 < 0)
 501       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 502       //            || c3 < 0)
 503       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 504       //            || c4 < 0)) {
 505       //          currentPosition = temp;
 506       //          return 2;
 507       //        }
 508       //
 509       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 510       //        if (currentCharacter == testedChar1)
 511       //          result = 0;
 512       //        else if (currentCharacter == testedChar2)
 513       //          result = 1;
 514       //        else {
 515       //          currentPosition = temp;
 516       //          return -1;
 517       //        }
 518       //
 519       //        //need the unicode buffer
 520       //        if (withoutUnicodePtr == 0) {
 521       //          //buffer all the entries that have been left aside....
 522       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 523       //          System.arraycopy(
 524       //            source,
 525       //            startPosition,
 526       //            withoutUnicodeBuffer,
 527       //            1,
 528       //            withoutUnicodePtr);
 529       //        }
 530       //        //fill the buffer with the char
 531       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 532       //        return result;
 533       //      } //-------------end unicode traitement--------------
 534       //      else {
 535       if (currentCharacter == testedChar1)
 536         result = 0;
 537       else if (currentCharacter == testedChar2)
 538         result = 1;
 539       else {
 540         currentPosition = temp;
 541         return -1;
 542       }
 543       //        if (withoutUnicodePtr != 0)
 544       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 545       return result;
 546       //     }
 547     } catch (IndexOutOfBoundsException e) {
 548       currentPosition = temp;
 549       return -1;
 550     }
 551   }
 552
 553   public final boolean getNextCharAsDigit() {
 554     //BOOLEAN
 555     //handle the case of unicode.
 556     //when a unicode appears then we must use a buffer that holds char
 557     // internal values
 558     //At the end of this method currentCharacter holds the new visited char
 559     //and currentPosition points right next after it
 560     //Both previous lines are true if the currentCharacter is a digit
 561     //On false, no side effect has occured.
 562     //ALL getNextChar.... ARE OPTIMIZED COPIES
 563     int temp = currentPosition;
 564     try {
 565       currentCharacter = source[currentPosition++];
 566       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 567       //        && (source[currentPosition] == 'u')) {
 568       //        //-------------unicode traitement ------------
 569       //        int c1, c2, c3, c4;
 570       //        int unicodeSize = 6;
 571       //        currentPosition++;
 572       //        while (source[currentPosition] == 'u') {
 573       //          currentPosition++;
 574       //          unicodeSize++;
 575       //        }
 576       //
 577       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 578       //          || c1 < 0)
 579       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 580       //            || c2 < 0)
 581       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 582       //            || c3 < 0)
 583       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 584       //            || c4 < 0)) {
 585       //          currentPosition = temp;
 586       //          return false;
 587       //        }
 588       //
 589       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 590       //        if (!Character.isDigit(currentCharacter)) {
 591       //          currentPosition = temp;
 592       //          return false;
 593       //        }
 594       //
 595       //        //need the unicode buffer
 596       //        if (withoutUnicodePtr == 0) {
 597       //          //buffer all the entries that have been left aside....
 598       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 599       //          System.arraycopy(
 600       //            source,
 601       //            startPosition,
 602       //            withoutUnicodeBuffer,
 603       //            1,
 604       //            withoutUnicodePtr);
 605       //        }
 606       //        //fill the buffer with the char
 607       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 608       //        return true;
 609       //      } //-------------end unicode traitement--------------
 610       //      else {
 611       if (!Character.isDigit(currentCharacter)) {
 612         currentPosition = temp;
 613         return false;
 614       }
 615       //        if (withoutUnicodePtr != 0)
 616       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 617       return true;
 618       //      }
 619     } catch (IndexOutOfBoundsException e) {
 620       currentPosition = temp;
 621       return false;
 622     }
 623   }
 624
 625   public final boolean getNextCharAsDigit(int radix) {
 626     //BOOLEAN
 627     //handle the case of unicode.
 628     //when a unicode appears then we must use a buffer that holds char
 629     // internal values
 630     //At the end of this method currentCharacter holds the new visited char
 631     //and currentPosition points right next after it
 632     //Both previous lines are true if the currentCharacter is a digit base on
 633     // radix
 634     //On false, no side effect has occured.
 635     //ALL getNextChar.... ARE OPTIMIZED COPIES
 636     int temp = currentPosition;
 637     try {
 638       currentCharacter = source[currentPosition++];
 639       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 640       //        && (source[currentPosition] == 'u')) {
 641       //        //-------------unicode traitement ------------
 642       //        int c1, c2, c3, c4;
 643       //        int unicodeSize = 6;
 644       //        currentPosition++;
 645       //        while (source[currentPosition] == 'u') {
 646       //          currentPosition++;
 647       //          unicodeSize++;
 648       //        }
 649       //
 650       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 651       //          || c1 < 0)
 652       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 653       //            || c2 < 0)
 654       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 655       //            || c3 < 0)
 656       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 657       //            || c4 < 0)) {
 658       //          currentPosition = temp;
 659       //          return false;
 660       //        }
 661       //
 662       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 663       //        if (Character.digit(currentCharacter, radix) == -1) {
 664       //          currentPosition = temp;
 665       //          return false;
 666       //        }
 667       //
 668       //        //need the unicode buffer
 669       //        if (withoutUnicodePtr == 0) {
 670       //          //buffer all the entries that have been left aside....
 671       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 672       //          System.arraycopy(
 673       //            source,
 674       //            startPosition,
 675       //            withoutUnicodeBuffer,
 676       //            1,
 677       //            withoutUnicodePtr);
 678       //        }
 679       //        //fill the buffer with the char
 680       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 681       //        return true;
 682       //      } //-------------end unicode traitement--------------
 683       //      else {
 684       if (Character.digit(currentCharacter, radix) == -1) {
 685         currentPosition = temp;
 686         return false;
 687       }
 688       //        if (withoutUnicodePtr != 0)
 689       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 690       return true;
 691       //      }
 692     } catch (IndexOutOfBoundsException e) {
 693       currentPosition = temp;
 694       return false;
 695     }
 696   }
 697
 698   public boolean getNextCharAsJavaIdentifierPart() {
 699     //BOOLEAN
 700     //handle the case of unicode.
 701     //when a unicode appears then we must use a buffer that holds char
 702     // internal values
 703     //At the end of this method currentCharacter holds the new visited char
 704     //and currentPosition points right next after it
 705     //Both previous lines are true if the currentCharacter is a
 706     // JavaIdentifierPart
 707     //On false, no side effect has occured.
 708     //ALL getNextChar.... ARE OPTIMIZED COPIES
 709     int temp = currentPosition;
 710     try {
 711       currentCharacter = source[currentPosition++];
 712       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 713       //        && (source[currentPosition] == 'u')) {
 714       //        //-------------unicode traitement ------------
 715       //        int c1, c2, c3, c4;
 716       //        int unicodeSize = 6;
 717       //        currentPosition++;
 718       //        while (source[currentPosition] == 'u') {
 719       //          currentPosition++;
 720       //          unicodeSize++;
 721       //        }
 722       //
 723       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 724       //          || c1 < 0)
 725       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 726       //            || c2 < 0)
 727       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 728       //            || c3 < 0)
 729       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 730       //            || c4 < 0)) {
 731       //          currentPosition = temp;
 732       //          return false;
 733       //        }
 734       //
 735       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 736       //        if (!isPHPIdentifierPart(currentCharacter)) {
 737       //          currentPosition = temp;
 738       //          return false;
 739       //        }
 740       //
 741       //        //need the unicode buffer
 742       //        if (withoutUnicodePtr == 0) {
 743       //          //buffer all the entries that have been left aside....
 744       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 745       //          System.arraycopy(
 746       //            source,
 747       //            startPosition,
 748       //            withoutUnicodeBuffer,
 749       //            1,
 750       //            withoutUnicodePtr);
 751       //        }
 752       //        //fill the buffer with the char
 753       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 754       //        return true;
 755       //      } //-------------end unicode traitement--------------
 756       //      else {
 757       if (!isPHPIdentifierPart(currentCharacter)) {
 758         currentPosition = temp;
 759         return false;
 760       }
 761       //        if (withoutUnicodePtr != 0)
 762       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 763       return true;
 764       //      }
 765     } catch (IndexOutOfBoundsException e) {
 766       currentPosition = temp;
 767       return false;
 768     }
 769   }
 770
 771   public int getCastOrParen() {
 772     int tempPosition = currentPosition;
 773     char tempCharacter = currentCharacter;
 774     int tempToken = TokenNameLPAREN;
 775     boolean found = false;
 776     StringBuffer buf = new StringBuffer();
 777     try {
 778       do {
 779         currentCharacter = source[currentPosition++];
 780       } while (currentCharacter == ' ' || currentCharacter == '\t');
 781       while ((currentCharacter >= 'a' && currentCharacter <= 'z') || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
 782         buf.append(currentCharacter);
 783         currentCharacter = source[currentPosition++];
 784       }
 785       if (buf.length() >= 3 && buf.length() <= 7) {
 786         char[] data = buf.toString().toCharArray();
 787         int index = 0;
 788         switch (data.length) {
 789         case 3:
 790           // int
 791           if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
 792             found = true;
 793             tempToken = TokenNameintCAST;
 794           }
 795           break;
 796         case 4:
 797           // bool real
 798           if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
 799             found = true;
 800             tempToken = TokenNameboolCAST;
 801           } else {
 802             index = 0;
 803             if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
 804               found = true;
 805               tempToken = TokenNamedoubleCAST;
 806             }
 807           }
 808           break;
 809         case 5:
 810           // array unset float
 811           if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
 812               && (data[++index] == 'y')) {
 813             found = true;
 814             tempToken = TokenNamearrayCAST;
 815           } else {
 816             index = 0;
 817             if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
 818                 && (data[++index] == 't')) {
 819               found = true;
 820               tempToken = TokenNameunsetCAST;
 821             } else {
 822               index = 0;
 823               if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
 824                   && (data[++index] == 't')) {
 825                 found = true;
 826                 tempToken = TokenNamedoubleCAST;
 827               }
 828             }
 829           }
 830           break;
 831         case 6:
 832           // object string double
 833           if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
 834               && (data[++index] == 'c') && (data[++index] == 't')) {
 835             found = true;
 836             tempToken = TokenNameobjectCAST;
 837           } else {
 838             index = 0;
 839             if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
 840                 && (data[++index] == 'n') && (data[++index] == 'g')) {
 841               found = true;
 842               tempToken = TokenNamestringCAST;
 843             } else {
 844               index = 0;
 845               if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
 846                   && (data[++index] == 'l') && (data[++index] == 'e')) {
 847                 found = true;
 848                 tempToken = TokenNamedoubleCAST;
 849               }
 850             }
 851           }
 852           break;
 853         case 7:
 854           // boolean integer
 855           if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
 856               && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
 857             found = true;
 858             tempToken = TokenNameboolCAST;
 859           } else {
 860             index = 0;
 861             if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
 862                 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
 863               found = true;
 864               tempToken = TokenNameintCAST;
 865             }
 866           }
 867           break;
 868         }
 869         if (found) {
 870           while (currentCharacter == ' ' || currentCharacter == '\t') {
 871             currentCharacter = source[currentPosition++];
 872           }
 873           if (currentCharacter == ')') {
 874             return tempToken;
 875           }
 876         }
 877       }
 878     } catch (IndexOutOfBoundsException e) {
 879     }
 880     currentCharacter = tempCharacter;
 881     currentPosition = tempPosition;
 882     return TokenNameLPAREN;
 883   }
 884
 885   public void consumeStringInterpolated() throws InvalidInputException {
 886     try {
 887       // consume next character
 888       unicodeAsBackSlash = false;
 889       currentCharacter = source[currentPosition++];
 890       //                if (((currentCharacter = source[currentPosition++]) == '\\')
 891       //                  && (source[currentPosition] == 'u')) {
 892       //                  getNextUnicodeChar();
 893       //                } else {
 894       //                  if (withoutUnicodePtr != 0) {
 895       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
 896       //                      currentCharacter;
 897       //                  }
 898       //                }
 899       while (currentCharacter != '`') {
 900         /** ** in PHP \r and \n are valid in string literals *** */
 901         //                if ((currentCharacter == '\n')
 902         //                  || (currentCharacter == '\r')) {
 903         //                  // relocate if finding another quote fairly close: thus unicode
 904         // '/u000D' will be fully consumed
 905         //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 906         //                    if (currentPosition + lookAhead == source.length)
 907         //                      break;
 908         //                    if (source[currentPosition + lookAhead] == '\n')
 909         //                      break;
 910         //                    if (source[currentPosition + lookAhead] == '\"') {
 911         //                      currentPosition += lookAhead + 1;
 912         //                      break;
 913         //                    }
 914         //                  }
 915         //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
 916         //                }
 917         if (currentCharacter == '\\') {
 918           int escapeSize = currentPosition;
 919           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
 920           //scanEscapeCharacter make a side effect on this value and we need
 921           // the previous value few lines down this one
 922           scanDoubleQuotedEscapeCharacter();
 923           escapeSize = currentPosition - escapeSize;
 924           if (withoutUnicodePtr == 0) {
 925             //buffer all the entries that have been left aside....
 926             withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
 927             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
 928             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 929           } else { //overwrite the / in the buffer
 930             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
 931             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
 932               // where only one is correct
 933               withoutUnicodePtr--;
 934             }
 935           }
 936         }
 937         // consume next character
 938         unicodeAsBackSlash = false;
 939         currentCharacter = source[currentPosition++];
 940         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 941         //                    && (source[currentPosition] == 'u')) {
 942         //                    getNextUnicodeChar();
 943         //                  } else {
 944         if (withoutUnicodePtr != 0) {
 945           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 946         }
 947         //                  }
 948       }
 949     } catch (IndexOutOfBoundsException e) {
 950       //    reset end position for error reporting
 951       currentPosition -= 2;
 952       throw new InvalidInputException(UNTERMINATED_STRING);
 953     } catch (InvalidInputException e) {
 954       if (e.getMessage().equals(INVALID_ESCAPE)) {
 955         // relocate if finding another quote fairly close: thus unicode
 956         // '/u000D' will be fully consumed
 957         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 958           if (currentPosition + lookAhead == source.length)
 959             break;
 960           if (source[currentPosition + lookAhead] == '\n')
 961             break;
 962           if (source[currentPosition + lookAhead] == '`') {
 963             currentPosition += lookAhead + 1;
 964             break;
 965           }
 966         }
 967       }
 968       throw e; // rethrow
 969     }
 970     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
 971       // //$NON-NLS-?$ where ? is an
 972       // int.
 973       if (currentLine == null) {
 974         currentLine = new NLSLine();
 975         lines.add(currentLine);
 976       }
 977       currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
 978     }
 979   }
 980
 981   public void consumeStringConstant() throws InvalidInputException {
 982     try {
 983       // consume next character
 984       unicodeAsBackSlash = false;
 985       currentCharacter = source[currentPosition++];
 986       //                if (((currentCharacter = source[currentPosition++]) == '\\')
 987       //                  && (source[currentPosition] == 'u')) {
 988       //                  getNextUnicodeChar();
 989       //                } else {
 990       //                  if (withoutUnicodePtr != 0) {
 991       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
 992       //                      currentCharacter;
 993       //                  }
 994       //                }
 995       while (currentCharacter != '\'') {
 996         /** ** in PHP \r and \n are valid in string literals *** */
 997         //                  if ((currentCharacter == '\n')
 998         //                    || (currentCharacter == '\r')) {
 999         //                    // relocate if finding another quote fairly close: thus unicode
1000         // '/u000D' will be fully consumed
1001         //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1002         //                      if (currentPosition + lookAhead == source.length)
1003         //                        break;
1004         //                      if (source[currentPosition + lookAhead] == '\n')
1005         //                        break;
1006         //                      if (source[currentPosition + lookAhead] == '\"') {
1007         //                        currentPosition += lookAhead + 1;
1008         //                        break;
1009         //                      }
1010         //                    }
1011         //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1012         //                  }
1013         if (currentCharacter == '\\') {
1014           int escapeSize = currentPosition;
1015           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1016           //scanEscapeCharacter make a side effect on this value and we need
1017           // the previous value few lines down this one
1018           scanSingleQuotedEscapeCharacter();
1019           escapeSize = currentPosition - escapeSize;
1020           if (withoutUnicodePtr == 0) {
1021             //buffer all the entries that have been left aside....
1022             withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1023             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1024             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1025           } else { //overwrite the / in the buffer
1026             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1027             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1028               // where only one is correct
1029               withoutUnicodePtr--;
1030             }
1031           }
1032         }
1033         // consume next character
1034         unicodeAsBackSlash = false;
1035         currentCharacter = source[currentPosition++];
1036         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1037         //                    && (source[currentPosition] == 'u')) {
1038         //                    getNextUnicodeChar();
1039         //                  } else {
1040         if (withoutUnicodePtr != 0) {
1041           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1042         }
1043         //                  }
1044       }
1045     } catch (IndexOutOfBoundsException e) {
1046       // reset end position for error reporting
1047       currentPosition -= 2;
1048       throw new InvalidInputException(UNTERMINATED_STRING);
1049     } catch (InvalidInputException e) {
1050       if (e.getMessage().equals(INVALID_ESCAPE)) {
1051         // relocate if finding another quote fairly close: thus unicode
1052         // '/u000D' will be fully consumed
1053         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1054           if (currentPosition + lookAhead == source.length)
1055             break;
1056           if (source[currentPosition + lookAhead] == '\n')
1057             break;
1058           if (source[currentPosition + lookAhead] == '\'') {
1059             currentPosition += lookAhead + 1;
1060             break;
1061           }
1062         }
1063       }
1064       throw e; // rethrow
1065     }
1066     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1067       // //$NON-NLS-?$ where ? is an
1068       // int.
1069       if (currentLine == null) {
1070         currentLine = new NLSLine();
1071         lines.add(currentLine);
1072       }
1073       currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1074     }
1075   }
1076
1077   public void consumeStringLiteral() throws InvalidInputException {
1078     try {
1079       // consume next character
1080       unicodeAsBackSlash = false;
1081       currentCharacter = source[currentPosition++];
1082       //                if (((currentCharacter = source[currentPosition++]) == '\\')
1083       //                  && (source[currentPosition] == 'u')) {
1084       //                  getNextUnicodeChar();
1085       //                } else {
1086       //                  if (withoutUnicodePtr != 0) {
1087       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1088       //                      currentCharacter;
1089       //                  }
1090       //                }
1091       while (currentCharacter != '"') {
1092         /** ** in PHP \r and \n are valid in string literals *** */
1093         //                  if ((currentCharacter == '\n')
1094         //                    || (currentCharacter == '\r')) {
1095         //                    // relocate if finding another quote fairly close: thus unicode
1096         // '/u000D' will be fully consumed
1097         //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1098         //                      if (currentPosition + lookAhead == source.length)
1099         //                        break;
1100         //                      if (source[currentPosition + lookAhead] == '\n')
1101         //                        break;
1102         //                      if (source[currentPosition + lookAhead] == '\"') {
1103         //                        currentPosition += lookAhead + 1;
1104         //                        break;
1105         //                      }
1106         //                    }
1107         //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1108         //                  }
1109         if (currentCharacter == '\\') {
1110           int escapeSize = currentPosition;
1111           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1112           //scanEscapeCharacter make a side effect on this value and we need
1113           // the previous value few lines down this one
1114           scanDoubleQuotedEscapeCharacter();
1115           escapeSize = currentPosition - escapeSize;
1116           if (withoutUnicodePtr == 0) {
1117             //buffer all the entries that have been left aside....
1118             withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1119             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1120             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1121           } else { //overwrite the / in the buffer
1122             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1123             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1124               // where only one is correct
1125               withoutUnicodePtr--;
1126             }
1127           }
1128         }
1129         // consume next character
1130         unicodeAsBackSlash = false;
1131         currentCharacter = source[currentPosition++];
1132         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1133         //                    && (source[currentPosition] == 'u')) {
1134         //                    getNextUnicodeChar();
1135         //                  } else {
1136         if (withoutUnicodePtr != 0) {
1137           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1138         }
1139         //                  }
1140       }
1141     } catch (IndexOutOfBoundsException e) {
1142       //    reset end position for error reporting
1143       currentPosition -= 2;
1144       throw new InvalidInputException(UNTERMINATED_STRING);
1145     } catch (InvalidInputException e) {
1146       if (e.getMessage().equals(INVALID_ESCAPE)) {
1147         // relocate if finding another quote fairly close: thus unicode
1148         // '/u000D' will be fully consumed
1149         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1150           if (currentPosition + lookAhead == source.length)
1151             break;
1152           if (source[currentPosition + lookAhead] == '\n')
1153             break;
1154           if (source[currentPosition + lookAhead] == '\"') {
1155             currentPosition += lookAhead + 1;
1156             break;
1157           }
1158         }
1159       }
1160       throw e; // rethrow
1161     }
1162     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1163       // //$NON-NLS-?$ where ? is an
1164       // int.
1165       if (currentLine == null) {
1166         currentLine = new NLSLine();
1167         lines.add(currentLine);
1168       }
1169       currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1170     }
1171   }
1172
1173   public int getNextToken() throws InvalidInputException {
1174     if (!phpMode) {
1175       return getInlinedHTML(currentPosition);
1176     }
1177     if (phpMode) {
1178       this.wasAcr = false;
1179       if (diet) {
1180         jumpOverMethodBody();
1181         diet = false;
1182         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1183       }
1184       try {
1185         while (true) {
1186           withoutUnicodePtr = 0;
1187           //start with a new token
1188           char encapsedChar = ' ';
1189           if (!encapsedStringStack.isEmpty()) {
1190             encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
1191           }
1192           if (encapsedChar != '$' && encapsedChar != ' ') {
1193             currentCharacter = source[currentPosition++];
1194             if (currentCharacter == encapsedChar) {
1195               switch (currentCharacter) {
1196               case '`':
1197                 return TokenNameEncapsedString0;
1198               case '\'':
1199                 return TokenNameEncapsedString1;
1200               case '"':
1201                 return TokenNameEncapsedString2;
1202               }
1203             }
1204             while (currentCharacter != encapsedChar) {
1205               /** ** in PHP \r and \n are valid in string literals *** */
1206               switch (currentCharacter) {
1207               case '\\':
1208                 int escapeSize = currentPosition;
1209                 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1210                 //scanEscapeCharacter make a side effect on this value and
1211                 // we need the previous value few lines down this one
1212                 scanDoubleQuotedEscapeCharacter();
1213                 escapeSize = currentPosition - escapeSize;
1214                 if (withoutUnicodePtr == 0) {
1215                   //buffer all the entries that have been left aside....
1216                   withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1217                   System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1218                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1219                 } else { //overwrite the / in the buffer
1220                   withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1221                   if (backSlashAsUnicodeInString) { //there are TWO \ in
1222                     withoutUnicodePtr--;
1223                   }
1224                 }
1225                 break;
1226               case '$':
1227                 if (isPHPIdentifierStart(source[currentPosition]) || source[currentPosition] == '{') {
1228                   currentPosition--;
1229                   encapsedStringStack.push(new Character('$'));
1230                   return TokenNameSTRING;
1231                 }
1232                 break;
1233               case '{':
1234                 if (source[currentPosition] == '$') { // CURLY_OPEN
1235                   currentPosition--;
1236                   encapsedStringStack.push(new Character('$'));
1237                   return TokenNameSTRING;
1238                 }
1239               }
1240               // consume next character
1241               unicodeAsBackSlash = false;
1242               currentCharacter = source[currentPosition++];
1243               if (withoutUnicodePtr != 0) {
1244                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1245               }
1246               //                  }
1247             } // end while
1248             currentPosition--;
1249             return TokenNameSTRING;
1250           }
1251           // ---------Consume white space and handles startPosition---------
1252           int whiteStart = currentPosition;
1253           startPosition = currentPosition;
1254           currentCharacter = source[currentPosition++];
1255           if (encapsedChar == '$') {
1256             switch (currentCharacter) {
1257             case '\\':
1258               currentCharacter = source[currentPosition++];
1259               return TokenNameSTRING;
1260             case '{':
1261               if (encapsedChar == '$') {
1262                 if (getNextChar('$'))
1263                   return TokenNameLBRACE_DOLLAR;
1264               }
1265               return TokenNameLBRACE;
1266             case '}':
1267               return TokenNameRBRACE;
1268             case '[':
1269               return TokenNameLBRACKET;
1270             case ']':
1271               return TokenNameRBRACKET;
1272             case '\'':
1273               if (tokenizeStrings) {
1274                 consumeStringConstant();
1275                 return TokenNameStringSingleQuote;
1276               }
1277               return TokenNameEncapsedString1;
1278             case '"':
1279               return TokenNameEncapsedString2;
1280             case '`':
1281               if (tokenizeStrings) {
1282                 consumeStringInterpolated();
1283                 return TokenNameStringInterpolated;
1284               }
1285               return TokenNameEncapsedString0;
1286             case '-':
1287               if (getNextChar('>'))
1288                 return TokenNameMINUS_GREATER;
1289               return TokenNameSTRING;
1290             default:
1291               if (currentCharacter == '$') {
1292                 int oldPosition = currentPosition;
1293                 try {
1294                   currentCharacter = source[currentPosition++];
1295                   if (currentCharacter == '{') {
1296                     return TokenNameDOLLAR_LBRACE;
1297                   }
1298                   if (isPHPIdentifierStart(currentCharacter)) {
1299                     return scanIdentifierOrKeyword(true);
1300                   } else {
1301                     currentPosition = oldPosition;
1302                     return TokenNameSTRING;
1303                   }
1304                 } catch (IndexOutOfBoundsException e) {
1305                   currentPosition = oldPosition;
1306                   return TokenNameSTRING;
1307                 }
1308               }
1309               if (isPHPIdentifierStart(currentCharacter))
1310                 return scanIdentifierOrKeyword(false);
1311               if (Character.isDigit(currentCharacter))
1312                 return scanNumber(false);
1313               return TokenNameERROR;
1314             }
1315           }
1316           //          boolean isWhiteSpace;
1317
1318           while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1319             startPosition = currentPosition;
1320             currentCharacter = source[currentPosition++];
1321             //            if (((currentCharacter = source[currentPosition++]) == '\\')
1322             //              && (source[currentPosition] == 'u')) {
1323             //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
1324             //            } else {
1325             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1326               checkNonExternalizeString();
1327               if (recordLineSeparator) {
1328                 pushLineSeparator();
1329               } else {
1330                 currentLine = null;
1331               }
1332             }
1333             //            isWhiteSpace = (currentCharacter == ' ')
1334             //                || Character.isWhitespace(currentCharacter);
1335             //            }
1336           }
1337           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1338             // reposition scanner in case we are interested by spaces as tokens
1339             currentPosition--;
1340             startPosition = whiteStart;
1341             return TokenNameWHITESPACE;
1342           }
1343           //little trick to get out in the middle of a source compuation
1344           if (currentPosition > eofPosition)
1345             return TokenNameEOF;
1346           // ---------Identify the next token-------------
1347           switch (currentCharacter) {
1348           case '(':
1349             return getCastOrParen();
1350           case ')':
1351             return TokenNameRPAREN;
1352           case '{':
1353             return TokenNameLBRACE;
1354           case '}':
1355             return TokenNameRBRACE;
1356           case '[':
1357             return TokenNameLBRACKET;
1358           case ']':
1359             return TokenNameRBRACKET;
1360           case ';':
1361             return TokenNameSEMICOLON;
1362           case ',':
1363             return TokenNameCOMMA;
1364           case '.':
1365             if (getNextChar('='))
1366               return TokenNameDOT_EQUAL;
1367             if (getNextCharAsDigit())
1368               return scanNumber(true);
1369             return TokenNameDOT;
1370           case '+': {
1371             int test;
1372             if ((test = getNextChar('+', '=')) == 0)
1373               return TokenNamePLUS_PLUS;
1374             if (test > 0)
1375               return TokenNamePLUS_EQUAL;
1376             return TokenNamePLUS;
1377           }
1378           case '-': {
1379             int test;
1380             if ((test = getNextChar('-', '=')) == 0)
1381               return TokenNameMINUS_MINUS;
1382             if (test > 0)
1383               return TokenNameMINUS_EQUAL;
1384             if (getNextChar('>'))
1385               return TokenNameMINUS_GREATER;
1386             return TokenNameMINUS;
1387           }
1388           case '~':
1389             if (getNextChar('='))
1390               return TokenNameTWIDDLE_EQUAL;
1391             return TokenNameTWIDDLE;
1392           case '!':
1393             if (getNextChar('=')) {
1394               if (getNextChar('=')) {
1395                 return TokenNameNOT_EQUAL_EQUAL;
1396               }
1397               return TokenNameNOT_EQUAL;
1398             }
1399             return TokenNameNOT;
1400           case '*':
1401             if (getNextChar('='))
1402               return TokenNameMULTIPLY_EQUAL;
1403             return TokenNameMULTIPLY;
1404           case '%':
1405             if (getNextChar('='))
1406               return TokenNameREMAINDER_EQUAL;
1407             return TokenNameREMAINDER;
1408           case '<': {
1409             int oldPosition = currentPosition;
1410             try {
1411               currentCharacter = source[currentPosition++];
1412             } catch (IndexOutOfBoundsException e) {
1413               currentPosition = oldPosition;
1414               return TokenNameLESS;
1415             }
1416             switch (currentCharacter) {
1417             case '=':
1418               return TokenNameLESS_EQUAL;
1419             case '>':
1420               return TokenNameNOT_EQUAL;
1421             case '<':
1422               if (getNextChar('='))
1423                 return TokenNameLEFT_SHIFT_EQUAL;
1424               if (getNextChar('<')) {
1425                 currentCharacter = source[currentPosition++];
1426                 while (Character.isWhitespace(currentCharacter)) {
1427                   currentCharacter = source[currentPosition++];
1428                 }
1429                 int heredocStart = currentPosition - 1;
1430                 int heredocLength = 0;
1431                 if (isPHPIdentifierStart(currentCharacter)) {
1432                   currentCharacter = source[currentPosition++];
1433                 } else {
1434                   return TokenNameERROR;
1435                 }
1436                 while (isPHPIdentifierPart(currentCharacter)) {
1437                   currentCharacter = source[currentPosition++];
1438                 }
1439                 heredocLength = currentPosition - heredocStart - 1;
1440                 // heredoc end-tag determination
1441                 boolean endTag = true;
1442                 char ch;
1443                 do {
1444                   ch = source[currentPosition++];
1445                   if (ch == '\r' || ch == '\n') {
1446                     if (recordLineSeparator) {
1447                       pushLineSeparator();
1448                     } else {
1449                       currentLine = null;
1450                     }
1451                     for (int i = 0; i < heredocLength; i++) {
1452                       if (source[currentPosition + i] != source[heredocStart + i]) {
1453                         endTag = false;
1454                         break;
1455                       }
1456                     }
1457                     if (endTag) {
1458                       currentPosition += heredocLength - 1;
1459                       currentCharacter = source[currentPosition++];
1460                       break; // do...while loop
1461                     } else {
1462                       endTag = true;
1463                     }
1464                   }
1465                 } while (true);
1466                 return TokenNameHEREDOC;
1467               }
1468               return TokenNameLEFT_SHIFT;
1469             }
1470             currentPosition = oldPosition;
1471             return TokenNameLESS;
1472           }
1473           case '>': {
1474             int test;
1475             if ((test = getNextChar('=', '>')) == 0)
1476               return TokenNameGREATER_EQUAL;
1477             if (test > 0) {
1478               if ((test = getNextChar('=', '>')) == 0)
1479                 return TokenNameRIGHT_SHIFT_EQUAL;
1480               return TokenNameRIGHT_SHIFT;
1481             }
1482             return TokenNameGREATER;
1483           }
1484           case '=':
1485             if (getNextChar('=')) {
1486               if (getNextChar('=')) {
1487                 return TokenNameEQUAL_EQUAL_EQUAL;
1488               }
1489               return TokenNameEQUAL_EQUAL;
1490             }
1491             if (getNextChar('>'))
1492               return TokenNameEQUAL_GREATER;
1493             return TokenNameEQUAL;
1494           case '&': {
1495             int test;
1496             if ((test = getNextChar('&', '=')) == 0)
1497               return TokenNameAND_AND;
1498             if (test > 0)
1499               return TokenNameAND_EQUAL;
1500             return TokenNameAND;
1501           }
1502           case '|': {
1503             int test;
1504             if ((test = getNextChar('|', '=')) == 0)
1505               return TokenNameOR_OR;
1506             if (test > 0)
1507               return TokenNameOR_EQUAL;
1508             return TokenNameOR;
1509           }
1510           case '^':
1511             if (getNextChar('='))
1512               return TokenNameXOR_EQUAL;
1513             return TokenNameXOR;
1514           case '?':
1515             if (getNextChar('>')) {
1516               phpMode = false;
1517               if (currentPosition == source.length) {
1518                 phpMode = true;
1519                 return TokenNameINLINE_HTML;
1520               }
1521               return getInlinedHTML(currentPosition - 2);
1522             }
1523             return TokenNameQUESTION;
1524           case ':':
1525             if (getNextChar(':'))
1526               return TokenNamePAAMAYIM_NEKUDOTAYIM;
1527             return TokenNameCOLON;
1528           case '@':
1529             return TokenNameAT;
1530           case '\'':
1531             consumeStringConstant();
1532             return TokenNameStringSingleQuote;
1533           case '"':
1534             if (tokenizeStrings) {
1535               consumeStringLiteral();
1536               return TokenNameStringDoubleQuote;
1537             }
1538             return TokenNameEncapsedString2;
1539           case '`':
1540             if (tokenizeStrings) {
1541               consumeStringInterpolated();
1542               return TokenNameStringInterpolated;
1543             }
1544             return TokenNameEncapsedString0;
1545           case '#':
1546           case '/': {
1547             char startChar = currentCharacter;
1548             if (getNextChar('=') && startChar == '/') {
1549               return TokenNameDIVIDE_EQUAL;
1550             }
1551             int test;
1552             if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1553               //line comment
1554               this.lastCommentLinePosition = this.currentPosition;
1555               int endPositionForLineComment = 0;
1556               try { //get the next char
1557                 currentCharacter = source[currentPosition++];
1558                 //                    if (((currentCharacter = source[currentPosition++])
1559                 //                      == '\\')
1560                 //                      && (source[currentPosition] == 'u')) {
1561                 //                      //-------------unicode traitement ------------
1562                 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1563                 //                      currentPosition++;
1564                 //                      while (source[currentPosition] == 'u') {
1565                 //                        currentPosition++;
1566                 //                      }
1567                 //                      if ((c1 =
1568                 //                        Character.getNumericValue(source[currentPosition++]))
1569                 //                        > 15
1570                 //                        || c1 < 0
1571                 //                        || (c2 =
1572                 //                          Character.getNumericValue(source[currentPosition++]))
1573                 //                          > 15
1574                 //                        || c2 < 0
1575                 //                        || (c3 =
1576                 //                          Character.getNumericValue(source[currentPosition++]))
1577                 //                          > 15
1578                 //                        || c3 < 0
1579                 //                        || (c4 =
1580                 //                          Character.getNumericValue(source[currentPosition++]))
1581                 //                          > 15
1582                 //                        || c4 < 0) {
1583                 //                        throw new
1584                 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1585                 //                      } else {
1586                 //                        currentCharacter =
1587                 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1588                 //                      }
1589                 //                    }
1590                 //handle the \\u case manually into comment
1591                 //                    if (currentCharacter == '\\') {
1592                 //                      if (source[currentPosition] == '\\')
1593                 //                        currentPosition++;
1594                 //                    } //jump over the \\
1595                 boolean isUnicode = false;
1596                 while (currentCharacter != '\r' && currentCharacter != '\n') {
1597                   this.lastCommentLinePosition = this.currentPosition;
1598                   if (currentCharacter == '?') {
1599                     if (getNextChar('>')) {
1600                       startPosition = currentPosition - 2;
1601                       phpMode = false;
1602                       return TokenNameINLINE_HTML;
1603                     }
1604                   }
1605                   //get the next char
1606                   isUnicode = false;
1607                   currentCharacter = source[currentPosition++];
1608                   //                      if (((currentCharacter = source[currentPosition++])
1609                   //                        == '\\')
1610                   //                        && (source[currentPosition] == 'u')) {
1611                   //                        isUnicode = true;
1612                   //                        //-------------unicode traitement ------------
1613                   //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1614                   //                        currentPosition++;
1615                   //                        while (source[currentPosition] == 'u') {
1616                   //                          currentPosition++;
1617                   //                        }
1618                   //                        if ((c1 =
1619                   //                          Character.getNumericValue(source[currentPosition++]))
1620                   //                          > 15
1621                   //                          || c1 < 0
1622                   //                          || (c2 =
1623                   //                            Character.getNumericValue(
1624                   //                              source[currentPosition++]))
1625                   //                            > 15
1626                   //                          || c2 < 0
1627                   //                          || (c3 =
1628                   //                            Character.getNumericValue(
1629                   //                              source[currentPosition++]))
1630                   //                            > 15
1631                   //                          || c3 < 0
1632                   //                          || (c4 =
1633                   //                            Character.getNumericValue(
1634                   //                              source[currentPosition++]))
1635                   //                            > 15
1636                   //                          || c4 < 0) {
1637                   //                          throw new
1638                   // InvalidInputException(INVALID_UNICODE_ESCAPE);
1639                   //                        } else {
1640                   //                          currentCharacter =
1641                   //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1642                   //                        }
1643                   //                      }
1644                   //handle the \\u case manually into comment
1645                   //                      if (currentCharacter == '\\') {
1646                   //                        if (source[currentPosition] == '\\')
1647                   //                          currentPosition++;
1648                   //                      } //jump over the \\
1649                 }
1650                 if (isUnicode) {
1651                   endPositionForLineComment = currentPosition - 6;
1652                 } else {
1653                   endPositionForLineComment = currentPosition - 1;
1654                 }
1655                 //                    recordComment(false);
1656                 recordComment(TokenNameCOMMENT_LINE);
1657                 if (this.taskTags != null)
1658                   checkTaskTag(this.startPosition, this.currentPosition);
1659                 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1660                   checkNonExternalizeString();
1661                   if (recordLineSeparator) {
1662                     if (isUnicode) {
1663                       pushUnicodeLineSeparator();
1664                     } else {
1665                       pushLineSeparator();
1666                     }
1667                   } else {
1668                     currentLine = null;
1669                   }
1670                 }
1671                 if (tokenizeComments) {
1672                   if (!isUnicode) {
1673                     currentPosition = endPositionForLineComment;
1674                     // reset one character behind
1675                   }
1676                   return TokenNameCOMMENT_LINE;
1677                 }
1678               } catch (IndexOutOfBoundsException e) { //an eof will them
1679                 // be generated
1680                 if (tokenizeComments) {
1681                   currentPosition--;
1682                   // reset one character behind
1683                   return TokenNameCOMMENT_LINE;
1684                 }
1685               }
1686               break;
1687             }
1688             if (test > 0) {
1689               //traditional and annotation comment
1690               boolean isJavadoc = false, star = false;
1691               // consume next character
1692               unicodeAsBackSlash = false;
1693               currentCharacter = source[currentPosition++];
1694               //                  if (((currentCharacter = source[currentPosition++]) ==
1695               // '\\')
1696               //                    && (source[currentPosition] == 'u')) {
1697               //                    getNextUnicodeChar();
1698               //                  } else {
1699               //                    if (withoutUnicodePtr != 0) {
1700               //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1701               //                        currentCharacter;
1702               //                    }
1703               //                  }
1704               if (currentCharacter == '*') {
1705                 isJavadoc = true;
1706                 star = true;
1707               }
1708               if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1709                 checkNonExternalizeString();
1710                 if (recordLineSeparator) {
1711                   pushLineSeparator();
1712                 } else {
1713                   currentLine = null;
1714                 }
1715               }
1716               try { //get the next char
1717                 currentCharacter = source[currentPosition++];
1718                 //                    if (((currentCharacter = source[currentPosition++])
1719                 //                      == '\\')
1720                 //                      && (source[currentPosition] == 'u')) {
1721                 //                      //-------------unicode traitement ------------
1722                 //                      getNextUnicodeChar();
1723                 //                    }
1724                 //handle the \\u case manually into comment
1725                 //                    if (currentCharacter == '\\') {
1726                 //                      if (source[currentPosition] == '\\')
1727                 //                        currentPosition++;
1728                 //                      //jump over the \\
1729                 //                    }
1730                 // empty comment is not a javadoc /**/
1731                 if (currentCharacter == '/') {
1732                   isJavadoc = false;
1733                 }
1734                 //loop until end of comment */
1735                 while ((currentCharacter != '/') || (!star)) {
1736                   if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1737                     checkNonExternalizeString();
1738                     if (recordLineSeparator) {
1739                       pushLineSeparator();
1740                     } else {
1741                       currentLine = null;
1742                     }
1743                   }
1744                   star = currentCharacter == '*';
1745                   //get next char
1746                   currentCharacter = source[currentPosition++];
1747                   //                      if (((currentCharacter = source[currentPosition++])
1748                   //                        == '\\')
1749                   //                        && (source[currentPosition] == 'u')) {
1750                   //                        //-------------unicode traitement ------------
1751                   //                        getNextUnicodeChar();
1752                   //                      }
1753                   //handle the \\u case manually into comment
1754                   //                      if (currentCharacter == '\\') {
1755                   //                        if (source[currentPosition] == '\\')
1756                   //                          currentPosition++;
1757                   //                      } //jump over the \\
1758                 }
1759                 //recordComment(isJavadoc);
1760                 if (isJavadoc) {
1761                   recordComment(TokenNameCOMMENT_PHPDOC);
1762                 } else {
1763                   recordComment(TokenNameCOMMENT_BLOCK);
1764                 }
1765
1766                 if (tokenizeComments) {
1767                   if (isJavadoc)
1768                     return TokenNameCOMMENT_PHPDOC;
1769                   return TokenNameCOMMENT_BLOCK;
1770                 }
1771
1772                 if (this.taskTags != null) {
1773                   checkTaskTag(this.startPosition, this.currentPosition);
1774                 }
1775               } catch (IndexOutOfBoundsException e) {
1776                 //                  reset end position for error reporting
1777                 currentPosition -= 2;
1778                 throw new InvalidInputException(UNTERMINATED_COMMENT);
1779               }
1780               break;
1781             }
1782             return TokenNameDIVIDE;
1783           }
1784           case '\u001a':
1785             if (atEnd())
1786               return TokenNameEOF;
1787             //the atEnd may not be <currentPosition == source.length> if
1788             // source is only some part of a real (external) stream
1789             throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1790           default:
1791             if (currentCharacter == '$') {
1792               int oldPosition = currentPosition;
1793               try {
1794                 currentCharacter = source[currentPosition++];
1795                 if (isPHPIdentifierStart(currentCharacter)) {
1796                   return scanIdentifierOrKeyword(true);
1797                 } else {
1798                   currentPosition = oldPosition;
1799                   return TokenNameDOLLAR;
1800                 }
1801               } catch (IndexOutOfBoundsException e) {
1802                 currentPosition = oldPosition;
1803                 return TokenNameDOLLAR;
1804               }
1805             }
1806             if (isPHPIdentifierStart(currentCharacter))
1807               return scanIdentifierOrKeyword(false);
1808             if (Character.isDigit(currentCharacter))
1809               return scanNumber(false);
1810             return TokenNameERROR;
1811           }
1812         }
1813       } //-----------------end switch while try--------------------
1814       catch (IndexOutOfBoundsException e) {
1815       }
1816     }
1817     return TokenNameEOF;
1818   }
1819
1820   private int getInlinedHTML(int start) throws InvalidInputException {
1821     int token = getInlinedHTMLToken(start);
1822     if (token == TokenNameINLINE_HTML) {
1823       //                Stack stack = new Stack();
1824       //                // scan html for errors
1825       //                Source inlinedHTMLSource = new Source(new String(source, startPosition, currentPosition - startPosition));
1826       //                int lastPHPEndPos=0;
1827       //                for (Iterator i=inlinedHTMLSource.getNextTagIterator(0); i.hasNext();) {
1828       //                    Tag tag=(Tag)i.next();
1829       //
1830       //                    if (tag instanceof StartTag) {
1831       //                        StartTag startTag=(StartTag)tag;
1832       //                      // System.out.println("startTag: "+tag);
1833       //                        if (startTag.isServerTag()) {
1834       //                          // TODO : what to do with a server tag ?
1835       //                        } else {
1836       //                            // do whatever with HTML start tag
1837       //                            // use startTag.getElement() to find the element corresponding
1838       //                            // to this start tag which may be useful if you implement code
1839       //                            // folding etc
1840       //                                stack.push(startTag);
1841       //                        }
1842       //                    } else {
1843       //                        EndTag endTag=(EndTag)tag;
1844       //                        StartTag stag = (StartTag) stack.peek();
1845       //// System.out.println("endTag: "+tag);
1846       //                        // do whatever with HTML end tag.
1847       //                    }
1848       //                }
1849     }
1850     return token;
1851   }
1852
1853   /**
1854    * @return
1855    * @throws InvalidInputException
1856    */
1857   private int getInlinedHTMLToken(int start) throws InvalidInputException {
1858     if (currentPosition > source.length) {
1859       currentPosition = source.length;
1860       return TokenNameEOF;
1861     }
1862     startPosition = start;
1863     try {
1864       while (!phpMode) {
1865         currentCharacter = source[currentPosition++];
1866         if (currentCharacter == '<') {
1867           if (getNextChar('?')) {
1868             currentCharacter = source[currentPosition++];
1869             if ((currentCharacter != 'P') && (currentCharacter != 'p')) {
1870               currentPosition--;
1871               // (currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1872               // <?
1873               if (ignorePHPOneLiner) {
1874                 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1875                   phpMode = true;
1876                   return TokenNameINLINE_HTML;
1877                 }
1878               } else {
1879                 phpMode = true;
1880                 return TokenNameINLINE_HTML;
1881               }
1882             } else {
1883               //              boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
1884               //              if (phpStart) {
1885               int test = getNextChar('H', 'h');
1886               if (test >= 0) {
1887                 test = getNextChar('P', 'p');
1888                 if (test >= 0) {
1889                   // <?PHP <?php
1890                   if (ignorePHPOneLiner) {
1891                     if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1892                       phpMode = true;
1893                       return TokenNameINLINE_HTML;
1894                     }
1895                   } else {
1896                     phpMode = true;
1897                     return TokenNameINLINE_HTML;
1898                   }
1899                 }
1900               }
1901               //              }
1902             }
1903           }
1904         }
1905         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1906           if (recordLineSeparator) {
1907             pushLineSeparator();
1908           } else {
1909             currentLine = null;
1910           }
1911         }
1912       } //-----------------while--------------------
1913       phpMode = true;
1914       return TokenNameINLINE_HTML;
1915     } //-----------------try--------------------
1916     catch (IndexOutOfBoundsException e) {
1917       startPosition = start;
1918       currentPosition--;
1919     }
1920     phpMode = true;
1921     return TokenNameINLINE_HTML;
1922   }
1923
1924   /**
1925    * @return
1926    */
1927   private int lookAheadLinePHPTag() {
1928     // check if the PHP is only in this line (for CodeFormatter)
1929     int currentPositionInLine = currentPosition;
1930     char previousCharInLine = ' ';
1931     char currentCharInLine = ' ';
1932     boolean singleQuotedStringActive = false;
1933     boolean doubleQuotedStringActive = false;
1934
1935     try {
1936       // look ahead in this line
1937       while (true) {
1938         previousCharInLine = currentCharInLine;
1939         currentCharInLine = source[currentPositionInLine++];
1940         switch (currentCharInLine) {
1941         case '>':
1942           if (previousCharInLine == '?') {
1943             // update the scanner's current Position in the source
1944             currentPosition = currentPositionInLine;
1945             // use as "dummy" token
1946             return TokenNameEOF;
1947           }
1948           break;
1949         case '\"':
1950           if (doubleQuotedStringActive) {
1951             if (previousCharInLine != '\\') {
1952               doubleQuotedStringActive = false;
1953             }
1954           } else {
1955             if (!singleQuotedStringActive) {
1956               doubleQuotedStringActive = true;
1957             }
1958           }
1959           break;
1960         case '\'':
1961           if (singleQuotedStringActive) {
1962             if (previousCharInLine != '\\') {
1963               singleQuotedStringActive = false;
1964             }
1965           } else {
1966             if (!doubleQuotedStringActive) {
1967               singleQuotedStringActive = true;
1968             }
1969           }
1970           break;
1971         case '\n':
1972           phpMode = true;
1973           return TokenNameINLINE_HTML;
1974         case '#':
1975           if (!singleQuotedStringActive && !doubleQuotedStringActive) {
1976             phpMode = true;
1977             return TokenNameINLINE_HTML;
1978           }
1979           break;
1980         case '/':
1981           if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1982             phpMode = true;
1983             return TokenNameINLINE_HTML;
1984           }
1985           break;
1986         case '*':
1987           if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1988             phpMode = true;
1989             return TokenNameINLINE_HTML;
1990           }
1991           break;
1992         }
1993       }
1994     } catch (IndexOutOfBoundsException e) {
1995       phpMode = true;
1996       currentPosition = currentPositionInLine;
1997       return TokenNameINLINE_HTML;
1998     }
1999   }
2000
2001   //  public final void getNextUnicodeChar()
2002   //    throws IndexOutOfBoundsException, InvalidInputException {
2003   //    //VOID
2004   //    //handle the case of unicode.
2005   //    //when a unicode appears then we must use a buffer that holds char
2006   // internal values
2007   //    //At the end of this method currentCharacter holds the new visited char
2008   //    //and currentPosition points right next after it
2009   //
2010   //    //ALL getNextChar.... ARE OPTIMIZED COPIES
2011   //
2012   //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
2013   //    currentPosition++;
2014   //    while (source[currentPosition] == 'u') {
2015   //      currentPosition++;
2016   //      unicodeSize++;
2017   //    }
2018   //
2019   //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2020   //      || c1 < 0
2021   //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2022   //      || c2 < 0
2023   //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2024   //      || c3 < 0
2025   //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2026   //      || c4 < 0) {
2027   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2028   //    } else {
2029   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2030   //      //need the unicode buffer
2031   //      if (withoutUnicodePtr == 0) {
2032   //        //buffer all the entries that have been left aside....
2033   //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2034   //        System.arraycopy(
2035   //          source,
2036   //          startPosition,
2037   //          withoutUnicodeBuffer,
2038   //          1,
2039   //          withoutUnicodePtr);
2040   //      }
2041   //      //fill the buffer with the char
2042   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2043   //    }
2044   //    unicodeAsBackSlash = currentCharacter == '\\';
2045   //  }
2046   /*
2047    * Tokenize a method body, assuming that curly brackets are properly balanced.
2048    */
2049   public final void jumpOverMethodBody() {
2050     this.wasAcr = false;
2051     int found = 1;
2052     try {
2053       while (true) { //loop for jumping over comments
2054         // ---------Consume white space and handles startPosition---------
2055         boolean isWhiteSpace;
2056         do {
2057           startPosition = currentPosition;
2058           currentCharacter = source[currentPosition++];
2059           //          if (((currentCharacter = source[currentPosition++]) == '\\')
2060           //            && (source[currentPosition] == 'u')) {
2061           //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
2062           //          } else {
2063           if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2064             pushLineSeparator();
2065           isWhiteSpace = Character.isWhitespace(currentCharacter);
2066           //          }
2067         } while (isWhiteSpace);
2068         // -------consume token until } is found---------
2069         switch (currentCharacter) {
2070         case '{':
2071           found++;
2072           break;
2073         case '}':
2074           found--;
2075           if (found == 0)
2076             return;
2077           break;
2078         case '\'': {
2079           boolean test;
2080           test = getNextChar('\\');
2081           if (test) {
2082             try {
2083               scanDoubleQuotedEscapeCharacter();
2084             } catch (InvalidInputException ex) {
2085             }
2086             ;
2087           } else {
2088             //                try { // consume next character
2089             unicodeAsBackSlash = false;
2090             currentCharacter = source[currentPosition++];
2091             //                  if (((currentCharacter = source[currentPosition++]) == '\\')
2092             //                    && (source[currentPosition] == 'u')) {
2093             //                    getNextUnicodeChar();
2094             //                  } else {
2095             if (withoutUnicodePtr != 0) {
2096               withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2097             }
2098             //                  }
2099             //                } catch (InvalidInputException ex) {
2100             //                };
2101           }
2102           getNextChar('\'');
2103           break;
2104         }
2105         case '"':
2106           try {
2107             //              try { // consume next character
2108             unicodeAsBackSlash = false;
2109             currentCharacter = source[currentPosition++];
2110             //                if (((currentCharacter = source[currentPosition++]) == '\\')
2111             //                  && (source[currentPosition] == 'u')) {
2112             //                  getNextUnicodeChar();
2113             //                } else {
2114             if (withoutUnicodePtr != 0) {
2115               withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2116             }
2117             //                }
2118             //              } catch (InvalidInputException ex) {
2119             //              };
2120             while (currentCharacter != '"') {
2121               if (currentCharacter == '\r') {
2122                 if (source[currentPosition] == '\n')
2123                   currentPosition++;
2124                 break;
2125                 // the string cannot go further that the line
2126               }
2127               if (currentCharacter == '\n') {
2128                 break;
2129                 // the string cannot go further that the line
2130               }
2131               if (currentCharacter == '\\') {
2132                 try {
2133                   scanDoubleQuotedEscapeCharacter();
2134                 } catch (InvalidInputException ex) {
2135                 }
2136                 ;
2137               }
2138               //                try { // consume next character
2139               unicodeAsBackSlash = false;
2140               currentCharacter = source[currentPosition++];
2141               //                  if (((currentCharacter = source[currentPosition++]) == '\\')
2142               //                    && (source[currentPosition] == 'u')) {
2143               //                    getNextUnicodeChar();
2144               //                  } else {
2145               if (withoutUnicodePtr != 0) {
2146                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2147               }
2148               //                  }
2149               //                } catch (InvalidInputException ex) {
2150               //                };
2151             }
2152           } catch (IndexOutOfBoundsException e) {
2153             return;
2154           }
2155           break;
2156         case '/': {
2157           int test;
2158           if ((test = getNextChar('/', '*')) == 0) {
2159             //line comment
2160             try {
2161               //get the next char
2162               currentCharacter = source[currentPosition++];
2163               //                  if (((currentCharacter = source[currentPosition++]) ==
2164               // '\\')
2165               //                    && (source[currentPosition] == 'u')) {
2166               //                    //-------------unicode traitement ------------
2167               //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2168               //                    currentPosition++;
2169               //                    while (source[currentPosition] == 'u') {
2170               //                      currentPosition++;
2171               //                    }
2172               //                    if ((c1 =
2173               //                      Character.getNumericValue(source[currentPosition++]))
2174               //                      > 15
2175               //                      || c1 < 0
2176               //                      || (c2 =
2177               //                        Character.getNumericValue(source[currentPosition++]))
2178               //                        > 15
2179               //                      || c2 < 0
2180               //                      || (c3 =
2181               //                        Character.getNumericValue(source[currentPosition++]))
2182               //                        > 15
2183               //                      || c3 < 0
2184               //                      || (c4 =
2185               //                        Character.getNumericValue(source[currentPosition++]))
2186               //                        > 15
2187               //                      || c4 < 0) {
2188               //                      //error don't care of the value
2189               //                      currentCharacter = 'A';
2190               //                    } //something different from \n and \r
2191               //                    else {
2192               //                      currentCharacter =
2193               //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2194               //                    }
2195               //                  }
2196               while (currentCharacter != '\r' && currentCharacter != '\n') {
2197                 //get the next char
2198                 currentCharacter = source[currentPosition++];
2199                 //                    if (((currentCharacter = source[currentPosition++])
2200                 //                      == '\\')
2201                 //                      && (source[currentPosition] == 'u')) {
2202                 //                      //-------------unicode traitement ------------
2203                 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2204                 //                      currentPosition++;
2205                 //                      while (source[currentPosition] == 'u') {
2206                 //                        currentPosition++;
2207                 //                      }
2208                 //                      if ((c1 =
2209                 //                        Character.getNumericValue(source[currentPosition++]))
2210                 //                        > 15
2211                 //                        || c1 < 0
2212                 //                        || (c2 =
2213                 //                          Character.getNumericValue(source[currentPosition++]))
2214                 //                          > 15
2215                 //                        || c2 < 0
2216                 //                        || (c3 =
2217                 //                          Character.getNumericValue(source[currentPosition++]))
2218                 //                          > 15
2219                 //                        || c3 < 0
2220                 //                        || (c4 =
2221                 //                          Character.getNumericValue(source[currentPosition++]))
2222                 //                          > 15
2223                 //                        || c4 < 0) {
2224                 //                        //error don't care of the value
2225                 //                        currentCharacter = 'A';
2226                 //                      } //something different from \n and \r
2227                 //                      else {
2228                 //                        currentCharacter =
2229                 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2230                 //                      }
2231                 //                    }
2232               }
2233               if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2234                 pushLineSeparator();
2235             } catch (IndexOutOfBoundsException e) {
2236             } //an eof will them be generated
2237             break;
2238           }
2239           if (test > 0) {
2240             //traditional and annotation comment
2241             boolean star = false;
2242             //                try { // consume next character
2243             unicodeAsBackSlash = false;
2244             currentCharacter = source[currentPosition++];
2245             //                  if (((currentCharacter = source[currentPosition++]) == '\\')
2246             //                    && (source[currentPosition] == 'u')) {
2247             //                    getNextUnicodeChar();
2248             //                  } else {
2249             if (withoutUnicodePtr != 0) {
2250               withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2251             }
2252             //                  };
2253             //                } catch (InvalidInputException ex) {
2254             //                };
2255             if (currentCharacter == '*') {
2256               star = true;
2257             }
2258             if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2259               pushLineSeparator();
2260             try { //get the next char
2261               currentCharacter = source[currentPosition++];
2262               //                  if (((currentCharacter = source[currentPosition++]) ==
2263               // '\\')
2264               //                    && (source[currentPosition] == 'u')) {
2265               //                    //-------------unicode traitement ------------
2266               //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2267               //                    currentPosition++;
2268               //                    while (source[currentPosition] == 'u') {
2269               //                      currentPosition++;
2270               //                    }
2271               //                    if ((c1 =
2272               //                      Character.getNumericValue(source[currentPosition++]))
2273               //                      > 15
2274               //                      || c1 < 0
2275               //                      || (c2 =
2276               //                        Character.getNumericValue(source[currentPosition++]))
2277               //                        > 15
2278               //                      || c2 < 0
2279               //                      || (c3 =
2280               //                        Character.getNumericValue(source[currentPosition++]))
2281               //                        > 15
2282               //                      || c3 < 0
2283               //                      || (c4 =
2284               //                        Character.getNumericValue(source[currentPosition++]))
2285               //                        > 15
2286               //                      || c4 < 0) {
2287               //                      //error don't care of the value
2288               //                      currentCharacter = 'A';
2289               //                    } //something different from * and /
2290               //                    else {
2291               //                      currentCharacter =
2292               //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2293               //                    }
2294               //                  }
2295               //loop until end of comment */
2296               while ((currentCharacter != '/') || (!star)) {
2297                 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2298                   pushLineSeparator();
2299                 star = currentCharacter == '*';
2300                 //get next char
2301                 currentCharacter = source[currentPosition++];
2302                 //                    if (((currentCharacter = source[currentPosition++])
2303                 //                      == '\\')
2304                 //                      && (source[currentPosition] == 'u')) {
2305                 //                      //-------------unicode traitement ------------
2306                 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2307                 //                      currentPosition++;
2308                 //                      while (source[currentPosition] == 'u') {
2309                 //                        currentPosition++;
2310                 //                      }
2311                 //                      if ((c1 =
2312                 //                        Character.getNumericValue(source[currentPosition++]))
2313                 //                        > 15
2314                 //                        || c1 < 0
2315                 //                        || (c2 =
2316                 //                          Character.getNumericValue(source[currentPosition++]))
2317                 //                          > 15
2318                 //                        || c2 < 0
2319                 //                        || (c3 =
2320                 //                          Character.getNumericValue(source[currentPosition++]))
2321                 //                          > 15
2322                 //                        || c3 < 0
2323                 //                        || (c4 =
2324                 //                          Character.getNumericValue(source[currentPosition++]))
2325                 //                          > 15
2326                 //                        || c4 < 0) {
2327                 //                        //error don't care of the value
2328                 //                        currentCharacter = 'A';
2329                 //                      } //something different from * and /
2330                 //                      else {
2331                 //                        currentCharacter =
2332                 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2333                 //                      }
2334                 //                    }
2335               }
2336             } catch (IndexOutOfBoundsException e) {
2337               return;
2338             }
2339             break;
2340           }
2341           break;
2342         }
2343         default:
2344           if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
2345             try {
2346               scanIdentifierOrKeyword((currentCharacter == '$'));
2347             } catch (InvalidInputException ex) {
2348             }
2349             ;
2350             break;
2351           }
2352           if (Character.isDigit(currentCharacter)) {
2353             try {
2354               scanNumber(false);
2355             } catch (InvalidInputException ex) {
2356             }
2357             ;
2358             break;
2359           }
2360         }
2361       }
2362       //-----------------end switch while try--------------------
2363     } catch (IndexOutOfBoundsException e) {
2364     } catch (InvalidInputException e) {
2365     }
2366     return;
2367   }
2368
2369   //  public final boolean jumpOverUnicodeWhiteSpace()
2370   //    throws InvalidInputException {
2371   //    //BOOLEAN
2372   //    //handle the case of unicode. Jump over the next whiteSpace
2373   //    //making startPosition pointing on the next available char
2374   //    //On false, the currentCharacter is filled up with a potential
2375   //    //correct char
2376   //
2377   //    try {
2378   //      this.wasAcr = false;
2379   //      int c1, c2, c3, c4;
2380   //      int unicodeSize = 6;
2381   //      currentPosition++;
2382   //      while (source[currentPosition] == 'u') {
2383   //        currentPosition++;
2384   //        unicodeSize++;
2385   //      }
2386   //
2387   //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2388   //        || c1 < 0)
2389   //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2390   //          || c2 < 0)
2391   //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2392   //          || c3 < 0)
2393   //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2394   //          || c4 < 0)) {
2395   //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2396   //      }
2397   //
2398   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2399   //      if (recordLineSeparator
2400   //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2401   //        pushLineSeparator();
2402   //      if (Character.isWhitespace(currentCharacter))
2403   //        return true;
2404   //
2405   //      //buffer the new char which is not a white space
2406   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2407   //      //withoutUnicodePtr == 1 is true here
2408   //      return false;
2409   //    } catch (IndexOutOfBoundsException e) {
2410   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2411   //    }
2412   //  }
2413   public final int[] getLineEnds() {
2414     //return a bounded copy of this.lineEnds
2415     int[] copy;
2416     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2417     return copy;
2418   }
2419
2420   public char[] getSource() {
2421     return this.source;
2422   }
2423
2424   public static boolean isIdentifierOrKeyword(int token) {
2425     return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2426   }
2427
2428   final char[] optimizedCurrentTokenSource1() {
2429     //return always the same char[] build only once
2430     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2431     char charOne = source[startPosition];
2432     switch (charOne) {
2433     case 'a':
2434       return charArray_a;
2435     case 'b':
2436       return charArray_b;
2437     case 'c':
2438       return charArray_c;
2439     case 'd':
2440       return charArray_d;
2441     case 'e':
2442       return charArray_e;
2443     case 'f':
2444       return charArray_f;
2445     case 'g':
2446       return charArray_g;
2447     case 'h':
2448       return charArray_h;
2449     case 'i':
2450       return charArray_i;
2451     case 'j':
2452       return charArray_j;
2453     case 'k':
2454       return charArray_k;
2455     case 'l':
2456       return charArray_l;
2457     case 'm':
2458       return charArray_m;
2459     case 'n':
2460       return charArray_n;
2461     case 'o':
2462       return charArray_o;
2463     case 'p':
2464       return charArray_p;
2465     case 'q':
2466       return charArray_q;
2467     case 'r':
2468       return charArray_r;
2469     case 's':
2470       return charArray_s;
2471     case 't':
2472       return charArray_t;
2473     case 'u':
2474       return charArray_u;
2475     case 'v':
2476       return charArray_v;
2477     case 'w':
2478       return charArray_w;
2479     case 'x':
2480       return charArray_x;
2481     case 'y':
2482       return charArray_y;
2483     case 'z':
2484       return charArray_z;
2485     default:
2486       return new char[] { charOne };
2487     }
2488   }
2489
2490   final char[] optimizedCurrentTokenSource2() {
2491     char c0, c1;
2492     c0 = source[startPosition];
2493     c1 = source[startPosition + 1];
2494     if (c0 == '$') {
2495       //return always the same char[] build only once
2496       //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2497       switch (c1) {
2498       case 'a':
2499         return charArray_va;
2500       case 'b':
2501         return charArray_vb;
2502       case 'c':
2503         return charArray_vc;
2504       case 'd':
2505         return charArray_vd;
2506       case 'e':
2507         return charArray_ve;
2508       case 'f':
2509         return charArray_vf;
2510       case 'g':
2511         return charArray_vg;
2512       case 'h':
2513         return charArray_vh;
2514       case 'i':
2515         return charArray_vi;
2516       case 'j':
2517         return charArray_vj;
2518       case 'k':
2519         return charArray_vk;
2520       case 'l':
2521         return charArray_vl;
2522       case 'm':
2523         return charArray_vm;
2524       case 'n':
2525         return charArray_vn;
2526       case 'o':
2527         return charArray_vo;
2528       case 'p':
2529         return charArray_vp;
2530       case 'q':
2531         return charArray_vq;
2532       case 'r':
2533         return charArray_vr;
2534       case 's':
2535         return charArray_vs;
2536       case 't':
2537         return charArray_vt;
2538       case 'u':
2539         return charArray_vu;
2540       case 'v':
2541         return charArray_vv;
2542       case 'w':
2543         return charArray_vw;
2544       case 'x':
2545         return charArray_vx;
2546       case 'y':
2547         return charArray_vy;
2548       case 'z':
2549         return charArray_vz;
2550       }
2551     }
2552     //try to return the same char[] build only once
2553     int hash = ((c0 << 6) + c1) % TableSize;
2554     char[][] table = charArray_length[0][hash];
2555     int i = newEntry2;
2556     while (++i < InternalTableSize) {
2557       char[] charArray = table[i];
2558       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2559         return charArray;
2560     }
2561     //---------other side---------
2562     i = -1;
2563     int max = newEntry2;
2564     while (++i <= max) {
2565       char[] charArray = table[i];
2566       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2567         return charArray;
2568     }
2569     //--------add the entry-------
2570     if (++max >= InternalTableSize)
2571       max = 0;
2572     char[] r;
2573     table[max] = (r = new char[] { c0, c1 });
2574     newEntry2 = max;
2575     return r;
2576   }
2577
2578   final char[] optimizedCurrentTokenSource3() {
2579     //try to return the same char[] build only once
2580     char c0, c1, c2;
2581     int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2582         % TableSize;
2583     char[][] table = charArray_length[1][hash];
2584     int i = newEntry3;
2585     while (++i < InternalTableSize) {
2586       char[] charArray = table[i];
2587       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2588         return charArray;
2589     }
2590     //---------other side---------
2591     i = -1;
2592     int max = newEntry3;
2593     while (++i <= max) {
2594       char[] charArray = table[i];
2595       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2596         return charArray;
2597     }
2598     //--------add the entry-------
2599     if (++max >= InternalTableSize)
2600       max = 0;
2601     char[] r;
2602     table[max] = (r = new char[] { c0, c1, c2 });
2603     newEntry3 = max;
2604     return r;
2605   }
2606
2607   final char[] optimizedCurrentTokenSource4() {
2608     //try to return the same char[] build only once
2609     char c0, c1, c2, c3;
2610     long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2611         + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2612         % TableSize;
2613     char[][] table = charArray_length[2][(int) hash];
2614     int i = newEntry4;
2615     while (++i < InternalTableSize) {
2616       char[] charArray = table[i];
2617       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2618         return charArray;
2619     }
2620     //---------other side---------
2621     i = -1;
2622     int max = newEntry4;
2623     while (++i <= max) {
2624       char[] charArray = table[i];
2625       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2626         return charArray;
2627     }
2628     //--------add the entry-------
2629     if (++max >= InternalTableSize)
2630       max = 0;
2631     char[] r;
2632     table[max] = (r = new char[] { c0, c1, c2, c3 });
2633     newEntry4 = max;
2634     return r;
2635   }
2636
2637   final char[] optimizedCurrentTokenSource5() {
2638     //try to return the same char[] build only once
2639     char c0, c1, c2, c3, c4;
2640     long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2641         + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2642         % TableSize;
2643     char[][] table = charArray_length[3][(int) hash];
2644     int i = newEntry5;
2645     while (++i < InternalTableSize) {
2646       char[] charArray = table[i];
2647       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2648         return charArray;
2649     }
2650     //---------other side---------
2651     i = -1;
2652     int max = newEntry5;
2653     while (++i <= max) {
2654       char[] charArray = table[i];
2655       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2656         return charArray;
2657     }
2658     //--------add the entry-------
2659     if (++max >= InternalTableSize)
2660       max = 0;
2661     char[] r;
2662     table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2663     newEntry5 = max;
2664     return r;
2665   }
2666
2667   final char[] optimizedCurrentTokenSource6() {
2668     //try to return the same char[] build only once
2669     char c0, c1, c2, c3, c4, c5;
2670     long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2671         + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2672         + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2673         % TableSize;
2674     char[][] table = charArray_length[4][(int) hash];
2675     int i = newEntry6;
2676     while (++i < InternalTableSize) {
2677       char[] charArray = table[i];
2678       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2679           && (c5 == charArray[5]))
2680         return charArray;
2681     }
2682     //---------other side---------
2683     i = -1;
2684     int max = newEntry6;
2685     while (++i <= max) {
2686       char[] charArray = table[i];
2687       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2688           && (c5 == charArray[5]))
2689         return charArray;
2690     }
2691     //--------add the entry-------
2692     if (++max >= InternalTableSize)
2693       max = 0;
2694     char[] r;
2695     table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2696     newEntry6 = max;
2697     return r;
2698   }
2699
2700   public final void pushLineSeparator() throws InvalidInputException {
2701     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2702     final int INCREMENT = 250;
2703     if (this.checkNonExternalizedStringLiterals) {
2704       // reinitialize the current line for non externalize strings purpose
2705       currentLine = null;
2706     }
2707     //currentCharacter is at position currentPosition-1
2708     // cr 000D
2709     if (currentCharacter == '\r') {
2710       int separatorPos = currentPosition - 1;
2711       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2712         return;
2713       //System.out.println("CR-" + separatorPos);
2714       try {
2715         lineEnds[++linePtr] = separatorPos;
2716       } catch (IndexOutOfBoundsException e) {
2717         //linePtr value is correct
2718         int oldLength = lineEnds.length;
2719         int[] old = lineEnds;
2720         lineEnds = new int[oldLength + INCREMENT];
2721         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2722         lineEnds[linePtr] = separatorPos;
2723       }
2724       // look-ahead for merged cr+lf
2725       try {
2726         if (source[currentPosition] == '\n') {
2727           //System.out.println("look-ahead LF-" + currentPosition);
2728           lineEnds[linePtr] = currentPosition;
2729           currentPosition++;
2730           wasAcr = false;
2731         } else {
2732           wasAcr = true;
2733         }
2734       } catch (IndexOutOfBoundsException e) {
2735         wasAcr = true;
2736       }
2737     } else {
2738       // lf 000A
2739       if (currentCharacter == '\n') {
2740         //must merge eventual cr followed by lf
2741         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2742           //System.out.println("merge LF-" + (currentPosition - 1));
2743           lineEnds[linePtr] = currentPosition - 1;
2744         } else {
2745           int separatorPos = currentPosition - 1;
2746           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2747             return;
2748           // System.out.println("LF-" + separatorPos);
2749           try {
2750             lineEnds[++linePtr] = separatorPos;
2751           } catch (IndexOutOfBoundsException e) {
2752             //linePtr value is correct
2753             int oldLength = lineEnds.length;
2754             int[] old = lineEnds;
2755             lineEnds = new int[oldLength + INCREMENT];
2756             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2757             lineEnds[linePtr] = separatorPos;
2758           }
2759         }
2760         wasAcr = false;
2761       }
2762     }
2763   }
2764
2765   public final void pushUnicodeLineSeparator() {
2766     // isUnicode means that the \r or \n has been read as a unicode character
2767     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2768     final int INCREMENT = 250;
2769     //currentCharacter is at position currentPosition-1
2770     if (this.checkNonExternalizedStringLiterals) {
2771       // reinitialize the current line for non externalize strings purpose
2772       currentLine = null;
2773     }
2774     // cr 000D
2775     if (currentCharacter == '\r') {
2776       int separatorPos = currentPosition - 6;
2777       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2778         return;
2779       //System.out.println("CR-" + separatorPos);
2780       try {
2781         lineEnds[++linePtr] = separatorPos;
2782       } catch (IndexOutOfBoundsException e) {
2783         //linePtr value is correct
2784         int oldLength = lineEnds.length;
2785         int[] old = lineEnds;
2786         lineEnds = new int[oldLength + INCREMENT];
2787         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2788         lineEnds[linePtr] = separatorPos;
2789       }
2790       // look-ahead for merged cr+lf
2791       if (source[currentPosition] == '\n') {
2792         //System.out.println("look-ahead LF-" + currentPosition);
2793         lineEnds[linePtr] = currentPosition;
2794         currentPosition++;
2795         wasAcr = false;
2796       } else {
2797         wasAcr = true;
2798       }
2799     } else {
2800       // lf 000A
2801       if (currentCharacter == '\n') {
2802         //must merge eventual cr followed by lf
2803         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2804           //System.out.println("merge LF-" + (currentPosition - 1));
2805           lineEnds[linePtr] = currentPosition - 6;
2806         } else {
2807           int separatorPos = currentPosition - 6;
2808           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2809             return;
2810           // System.out.println("LF-" + separatorPos);
2811           try {
2812             lineEnds[++linePtr] = separatorPos;
2813           } catch (IndexOutOfBoundsException e) {
2814             //linePtr value is correct
2815             int oldLength = lineEnds.length;
2816             int[] old = lineEnds;
2817             lineEnds = new int[oldLength + INCREMENT];
2818             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2819             lineEnds[linePtr] = separatorPos;
2820           }
2821         }
2822         wasAcr = false;
2823       }
2824     }
2825   }
2826
2827   public void recordComment(int token) {
2828     // compute position
2829     int stopPosition = this.currentPosition;
2830     switch (token) {
2831     case TokenNameCOMMENT_LINE:
2832       stopPosition = -this.lastCommentLinePosition;
2833       break;
2834     case TokenNameCOMMENT_BLOCK:
2835       stopPosition = -this.currentPosition;
2836       break;
2837     }
2838
2839     // a new comment is recorded
2840     int length = this.commentStops.length;
2841     if (++this.commentPtr >= length) {
2842       System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2843       //grows the positions buffers too
2844       System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2845     }
2846     this.commentStops[this.commentPtr] = stopPosition;
2847     this.commentStarts[this.commentPtr] = this.startPosition;
2848   }
2849
2850   //  public final void recordComment(boolean isJavadoc) {
2851   //    // a new annotation comment is recorded
2852   //    try {
2853   //      commentStops[++commentPtr] = isJavadoc
2854   //          ? currentPosition
2855   //          : -currentPosition;
2856   //    } catch (IndexOutOfBoundsException e) {
2857   //      int oldStackLength = commentStops.length;
2858   //      int[] oldStack = commentStops;
2859   //      commentStops = new int[oldStackLength + 30];
2860   //      System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2861   //      commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2862   //      //grows the positions buffers too
2863   //      int[] old = commentStarts;
2864   //      commentStarts = new int[oldStackLength + 30];
2865   //      System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2866   //    }
2867   //    //the buffer is of a correct size here
2868   //    commentStarts[commentPtr] = startPosition;
2869   //  }
2870   public void resetTo(int begin, int end) {
2871     //reset the scanner to a given position where it may rescan again
2872     diet = false;
2873     initialPosition = startPosition = currentPosition = begin;
2874     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2875     commentPtr = -1; // reset comment stack
2876   }
2877
2878   public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2879     // the string with "\\u" is a legal string of two chars \ and u
2880     //thus we use a direct access to the source (for regular cases).
2881     //    if (unicodeAsBackSlash) {
2882     //      // consume next character
2883     //      unicodeAsBackSlash = false;
2884     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2885     //        && (source[currentPosition] == 'u')) {
2886     //        getNextUnicodeChar();
2887     //      } else {
2888     //        if (withoutUnicodePtr != 0) {
2889     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2890     //        }
2891     //      }
2892     //    } else
2893     currentCharacter = source[currentPosition++];
2894     switch (currentCharacter) {
2895     case '\'':
2896       currentCharacter = '\'';
2897       break;
2898     case '\\':
2899       currentCharacter = '\\';
2900       break;
2901     default:
2902       currentCharacter = '\\';
2903       currentPosition--;
2904     }
2905   }
2906
2907   public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2908     // the string with "\\u" is a legal string of two chars \ and u
2909     //thus we use a direct access to the source (for regular cases).
2910     //    if (unicodeAsBackSlash) {
2911     //      // consume next character
2912     //      unicodeAsBackSlash = false;
2913     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2914     //        && (source[currentPosition] == 'u')) {
2915     //        getNextUnicodeChar();
2916     //      } else {
2917     //        if (withoutUnicodePtr != 0) {
2918     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2919     //        }
2920     //      }
2921     //    } else
2922     currentCharacter = source[currentPosition++];
2923     switch (currentCharacter) {
2924     //      case 'b' :
2925     //        currentCharacter = '\b';
2926     //        break;
2927     case 't':
2928       currentCharacter = '\t';
2929       break;
2930     case 'n':
2931       currentCharacter = '\n';
2932       break;
2933     //      case 'f' :
2934     //        currentCharacter = '\f';
2935     //        break;
2936     case 'r':
2937       currentCharacter = '\r';
2938       break;
2939     case '\"':
2940       currentCharacter = '\"';
2941       break;
2942     case '\'':
2943       currentCharacter = '\'';
2944       break;
2945     case '\\':
2946       currentCharacter = '\\';
2947       break;
2948     case '$':
2949       currentCharacter = '$';
2950       break;
2951     default:
2952       // -----------octal escape--------------
2953       // OctalDigit
2954       // OctalDigit OctalDigit
2955       // ZeroToThree OctalDigit OctalDigit
2956       int number = Character.getNumericValue(currentCharacter);
2957       if (number >= 0 && number <= 7) {
2958         boolean zeroToThreeNot = number > 3;
2959         if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2960           int digit = Character.getNumericValue(currentCharacter);
2961           if (digit >= 0 && digit <= 7) {
2962             number = (number * 8) + digit;
2963             if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2964               if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2965                 // Digit --> ignore last character
2966                 currentPosition--;
2967               } else {
2968                 digit = Character.getNumericValue(currentCharacter);
2969                 if (digit >= 0 && digit <= 7) {
2970                   // has read \ZeroToThree OctalDigit OctalDigit
2971                   number = (number * 8) + digit;
2972                 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2973                   // --> ignore last character
2974                   currentPosition--;
2975                 }
2976               }
2977             } else { // has read \OctalDigit NonDigit--> ignore last
2978               // character
2979               currentPosition--;
2980             }
2981           } else { // has read \OctalDigit NonOctalDigit--> ignore last
2982             // character
2983             currentPosition--;
2984           }
2985         } else { // has read \OctalDigit --> ignore last character
2986           currentPosition--;
2987         }
2988         if (number > 255)
2989           throw new InvalidInputException(INVALID_ESCAPE);
2990         currentCharacter = (char) number;
2991       }
2992     //else
2993     //     throw new InvalidInputException(INVALID_ESCAPE);
2994     }
2995   }
2996
2997   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2998   //    return scanIdentifierOrKeyword( false );
2999   //  }
3000   public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
3001     //test keywords
3002     //first dispatch on the first char.
3003     //then the length. If there are several
3004     //keywors with the same length AND the same first char, then do another
3005     //disptach on the second char :-)...cool....but fast !
3006     useAssertAsAnIndentifier = false;
3007     while (getNextCharAsJavaIdentifierPart()) {
3008     }
3009     ;
3010     if (isVariable) {
3011       //      if (new String(getCurrentTokenSource()).equals("$this")) {
3012       //        return TokenNamethis;
3013       //      }
3014       return TokenNameVariable;
3015     }
3016     int index, length;
3017     char[] data;
3018     char firstLetter;
3019     //    if (withoutUnicodePtr == 0)
3020     //quick test on length == 1 but not on length > 12 while most identifier
3021     //have a length which is <= 12...but there are lots of identifier with
3022     //only one char....
3023     //      {
3024     if ((length = currentPosition - startPosition) == 1)
3025       return TokenNameIdentifier;
3026     //  data = source;
3027     data = new char[length];
3028     index = startPosition;
3029     for (int i = 0; i < length; i++) {
3030       data[i] = Character.toLowerCase(source[index + i]);
3031     }
3032     index = 0;
3033     //    } else {
3034     //      if ((length = withoutUnicodePtr) == 1)
3035     //        return TokenNameIdentifier;
3036     //      // data = withoutUnicodeBuffer;
3037     //      data = new char[withoutUnicodeBuffer.length];
3038     //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
3039     //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
3040     //      }
3041     //      index = 1;
3042     //    }
3043     firstLetter = data[index];
3044     switch (firstLetter) {
3045     case '_':
3046       switch (length) {
3047       case 8:
3048         //__FILE__
3049         if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
3050             && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3051           return TokenNameFILE;
3052         index = 0; //__LINE__
3053         if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
3054             && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3055           return TokenNameLINE;
3056         break;
3057       case 9:
3058         //__CLASS__
3059         if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3060             && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
3061           return TokenNameCLASS_C;
3062         break;
3063       case 11:
3064         //__METHOD__
3065         if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
3066             && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
3067             && (data[++index] == '_'))
3068           return TokenNameMETHOD_C;
3069         break;
3070       case 12:
3071         //__FUNCTION__
3072         if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
3073             && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
3074             && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
3075           return TokenNameFUNC_C;
3076         break;
3077       }
3078       return TokenNameIdentifier;
3079     case 'a':
3080       // as and array abstract
3081       switch (length) {
3082       case 2:
3083         //as
3084         if ((data[++index] == 's')) {
3085           return TokenNameas;
3086         } else {
3087           return TokenNameIdentifier;
3088         }
3089       case 3:
3090         //and
3091         if ((data[++index] == 'n') && (data[++index] == 'd')) {
3092           return TokenNameand;
3093         } else {
3094           return TokenNameIdentifier;
3095         }
3096       case 5:
3097         // array
3098         if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3099           return TokenNamearray;
3100         else
3101           return TokenNameIdentifier;
3102       case 8:
3103         if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3104             && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3105           return TokenNameabstract;
3106         else
3107           return TokenNameIdentifier;
3108       default:
3109         return TokenNameIdentifier;
3110       }
3111     case 'b':
3112       //break
3113       switch (length) {
3114       case 5:
3115         if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3116           return TokenNamebreak;
3117         else
3118           return TokenNameIdentifier;
3119       default:
3120         return TokenNameIdentifier;
3121       }
3122     case 'c':
3123       //case catch class clone const continue
3124       switch (length) {
3125       case 4:
3126         if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3127           return TokenNamecase;
3128         else
3129           return TokenNameIdentifier;
3130       case 5:
3131         if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3132           return TokenNamecatch;
3133         index = 0;
3134         if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3135           return TokenNameclass;
3136         index = 0;
3137         if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3138           return TokenNameclone;
3139         index = 0;
3140         if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3141           return TokenNameconst;
3142         else
3143           return TokenNameIdentifier;
3144       case 8:
3145         if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3146             && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3147           return TokenNamecontinue;
3148         else
3149           return TokenNameIdentifier;
3150       default:
3151         return TokenNameIdentifier;
3152       }
3153     case 'd':
3154       // declare default do die
3155       // TODO delete define ==> no keyword !
3156       switch (length) {
3157       case 2:
3158         if ((data[++index] == 'o'))
3159           return TokenNamedo;
3160         else
3161           return TokenNameIdentifier;
3162       //          case 6 :
3163       //            if ((data[++index] == 'e')
3164       //              && (data[++index] == 'f')
3165       //              && (data[++index] == 'i')
3166       //              && (data[++index] == 'n')
3167       //              && (data[++index] == 'e'))
3168       //              return TokenNamedefine;
3169       //            else
3170       //              return TokenNameIdentifier;
3171       case 7:
3172         if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3173             && (data[++index] == 'r') && (data[++index] == 'e'))
3174           return TokenNamedeclare;
3175         index = 0;
3176         if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3177             && (data[++index] == 'l') && (data[++index] == 't'))
3178           return TokenNamedefault;
3179         else
3180           return TokenNameIdentifier;
3181       default:
3182         return TokenNameIdentifier;
3183       }
3184     case 'e':
3185       //echo else exit elseif extends eval
3186       switch (length) {
3187       case 4:
3188         if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3189           return TokenNameecho;
3190         else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3191           return TokenNameelse;
3192         else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3193           return TokenNameexit;
3194         else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3195           return TokenNameeval;
3196         else
3197           return TokenNameIdentifier;
3198       case 5:
3199         // endif empty
3200         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3201           return TokenNameendif;
3202         if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3203           return TokenNameempty;
3204         else
3205           return TokenNameIdentifier;
3206       case 6:
3207         // endfor
3208         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3209             && (data[++index] == 'r'))
3210           return TokenNameendfor;
3211         else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3212             && (data[++index] == 'f'))
3213           return TokenNameelseif;
3214         else
3215           return TokenNameIdentifier;
3216       case 7:
3217         if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3218             && (data[++index] == 'd') && (data[++index] == 's'))
3219           return TokenNameextends;
3220         else
3221           return TokenNameIdentifier;
3222       case 8:
3223         // endwhile
3224         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3225             && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3226           return TokenNameendwhile;
3227         else
3228           return TokenNameIdentifier;
3229       case 9:
3230         // endswitch
3231         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3232             && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3233           return TokenNameendswitch;
3234         else
3235           return TokenNameIdentifier;
3236       case 10:
3237         // enddeclare
3238         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3239             && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3240             && (data[++index] == 'e'))
3241           return TokenNameenddeclare;
3242         index = 0;
3243         if ((data[++index] == 'n') // endforeach
3244             && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3245             && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3246           return TokenNameendforeach;
3247         else
3248           return TokenNameIdentifier;
3249       default:
3250         return TokenNameIdentifier;
3251       }
3252     case 'f':
3253       //for false final function
3254       switch (length) {
3255       case 3:
3256         if ((data[++index] == 'o') && (data[++index] == 'r'))
3257           return TokenNamefor;
3258         else
3259           return TokenNameIdentifier;
3260       case 5:
3261         //            if ((data[++index] == 'a') && (data[++index] == 'l')
3262         //                && (data[++index] == 's') && (data[++index] == 'e'))
3263         //              return TokenNamefalse;
3264         if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3265           return TokenNamefinal;
3266         else
3267           return TokenNameIdentifier;
3268       case 7:
3269         // foreach
3270         if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3271             && (data[++index] == 'c') && (data[++index] == 'h'))
3272           return TokenNameforeach;
3273         else
3274           return TokenNameIdentifier;
3275       case 8:
3276         // function
3277         if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3278             && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3279           return TokenNamefunction;
3280         else
3281           return TokenNameIdentifier;
3282       default:
3283         return TokenNameIdentifier;
3284       }
3285     case 'g':
3286       //global
3287       if (length == 6) {
3288         if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3289             && (data[++index] == 'l')) {
3290           return TokenNameglobal;
3291         }
3292       }
3293       return TokenNameIdentifier;
3294     case 'i':
3295       //if int isset include include_once instanceof interface implements
3296       switch (length) {
3297       case 2:
3298         if (data[++index] == 'f')
3299           return TokenNameif;
3300         else
3301           return TokenNameIdentifier;
3302       //          case 3 :
3303       //            if ((data[++index] == 'n') && (data[++index] == 't'))
3304       //              return TokenNameint;
3305       //            else
3306       //              return TokenNameIdentifier;
3307       case 5:
3308         if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3309           return TokenNameisset;
3310         else
3311           return TokenNameIdentifier;
3312       case 7:
3313         if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3314             && (data[++index] == 'd') && (data[++index] == 'e'))
3315           return TokenNameinclude;
3316         else
3317           return TokenNameIdentifier;
3318       case 9:
3319         // interface
3320         if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3321             && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3322           return TokenNameinterface;
3323         else
3324           return TokenNameIdentifier;
3325       case 10:
3326         // instanceof
3327         if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3328             && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3329             && (data[++index] == 'f'))
3330           return TokenNameinstanceof;
3331         if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3332             && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3333             && (data[++index] == 's'))
3334           return TokenNameimplements;
3335         else
3336           return TokenNameIdentifier;
3337       case 12:
3338         if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3339             && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3340             && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3341           return TokenNameinclude_once;
3342         else
3343           return TokenNameIdentifier;
3344       default:
3345         return TokenNameIdentifier;
3346       }
3347     case 'l':
3348       //list
3349       if (length == 4) {
3350         if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3351           return TokenNamelist;
3352         }
3353       }
3354       return TokenNameIdentifier;
3355     case 'n':
3356       // new null
3357       switch (length) {
3358       case 3:
3359         if ((data[++index] == 'e') && (data[++index] == 'w'))
3360           return TokenNamenew;
3361         else
3362           return TokenNameIdentifier;
3363       //          case 4 :
3364       //            if ((data[++index] == 'u') && (data[++index] == 'l')
3365       //                && (data[++index] == 'l'))
3366       //              return TokenNamenull;
3367       //            else
3368       //              return TokenNameIdentifier;
3369       default:
3370         return TokenNameIdentifier;
3371       }
3372     case 'o':
3373       // or old_function
3374       if (length == 2) {
3375         if (data[++index] == 'r') {
3376           return TokenNameor;
3377         }
3378       }
3379       //        if (length == 12) {
3380       //          if ((data[++index] == 'l')
3381       //            && (data[++index] == 'd')
3382       //            && (data[++index] == '_')
3383       //            && (data[++index] == 'f')
3384       //            && (data[++index] == 'u')
3385       //            && (data[++index] == 'n')
3386       //            && (data[++index] == 'c')
3387       //            && (data[++index] == 't')
3388       //            && (data[++index] == 'i')
3389       //            && (data[++index] == 'o')
3390       //            && (data[++index] == 'n')) {
3391       //            return TokenNameold_function;
3392       //          }
3393       //        }
3394       return TokenNameIdentifier;
3395     case 'p':
3396       // print public private protected
3397       switch (length) {
3398       case 5:
3399         if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3400           return TokenNameprint;
3401         } else
3402           return TokenNameIdentifier;
3403       case 6:
3404         if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3405             && (data[++index] == 'c')) {
3406           return TokenNamepublic;
3407         } else
3408           return TokenNameIdentifier;
3409       case 7:
3410         if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3411             && (data[++index] == 't') && (data[++index] == 'e')) {
3412           return TokenNameprivate;
3413         } else
3414           return TokenNameIdentifier;
3415       case 9:
3416         if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3417             && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3418           return TokenNameprotected;
3419         } else
3420           return TokenNameIdentifier;
3421       }
3422       return TokenNameIdentifier;
3423     case 'r':
3424       //return require require_once
3425       if (length == 6) {
3426         if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3427             && (data[++index] == 'n')) {
3428           return TokenNamereturn;
3429         }
3430       } else if (length == 7) {
3431         if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3432             && (data[++index] == 'r') && (data[++index] == 'e')) {
3433           return TokenNamerequire;
3434         }
3435       } else if (length == 12) {
3436         if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3437             && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3438             && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3439           return TokenNamerequire_once;
3440         }
3441       } else
3442         return TokenNameIdentifier;
3443     case 's':
3444       //static switch
3445       switch (length) {
3446       case 6:
3447         if (data[++index] == 't')
3448           if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3449             return TokenNamestatic;
3450           } else
3451             return TokenNameIdentifier;
3452         else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3453             && (data[++index] == 'h'))
3454           return TokenNameswitch;
3455         else
3456           return TokenNameIdentifier;
3457       default:
3458         return TokenNameIdentifier;
3459       }
3460     case 't':
3461       // try true throw
3462       switch (length) {
3463       case 3:
3464         if ((data[++index] == 'r') && (data[++index] == 'y'))
3465           return TokenNametry;
3466         else
3467           return TokenNameIdentifier;
3468       //          case 4 :
3469       //            if ((data[++index] == 'r') && (data[++index] == 'u')
3470       //                && (data[++index] == 'e'))
3471       //              return TokenNametrue;
3472       //            else
3473       //              return TokenNameIdentifier;
3474       case 5:
3475         if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3476           return TokenNamethrow;
3477         else
3478           return TokenNameIdentifier;
3479       default:
3480         return TokenNameIdentifier;
3481       }
3482     case 'u':
3483       //use unset
3484       switch (length) {
3485       case 3:
3486         if ((data[++index] == 's') && (data[++index] == 'e'))
3487           return TokenNameuse;
3488         else
3489           return TokenNameIdentifier;
3490       case 5:
3491         if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3492           return TokenNameunset;
3493         else
3494           return TokenNameIdentifier;
3495       default:
3496         return TokenNameIdentifier;
3497       }
3498     case 'v':
3499       //var
3500       switch (length) {
3501       case 3:
3502         if ((data[++index] == 'a') && (data[++index] == 'r'))
3503           return TokenNamevar;
3504         else
3505           return TokenNameIdentifier;
3506       default:
3507         return TokenNameIdentifier;
3508       }
3509     case 'w':
3510       //while
3511       switch (length) {
3512       case 5:
3513         if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3514           return TokenNamewhile;
3515         else
3516           return TokenNameIdentifier;
3517       //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3518       // (data[++index]=='e') && (data[++index]=='f')&&
3519       // (data[++index]=='p'))
3520       //return TokenNamewidefp ;
3521       //else
3522       //return TokenNameIdentifier;
3523       default:
3524         return TokenNameIdentifier;
3525       }
3526     case 'x':
3527       //xor
3528       switch (length) {
3529       case 3:
3530         if ((data[++index] == 'o') && (data[++index] == 'r'))
3531           return TokenNamexor;
3532         else
3533           return TokenNameIdentifier;
3534       default:
3535         return TokenNameIdentifier;
3536       }
3537     default:
3538       return TokenNameIdentifier;
3539     }
3540   }
3541
3542   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3543     //when entering this method the currentCharacter is the firt
3544     //digit of the number , i.e. it may be preceeded by a . when
3545     //dotPrefix is true
3546     boolean floating = dotPrefix;
3547     if ((!dotPrefix) && (currentCharacter == '0')) {
3548       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3549         //force the first char of the hexa number do exist...
3550         // consume next character
3551         unicodeAsBackSlash = false;
3552         currentCharacter = source[currentPosition++];
3553         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3554         //          && (source[currentPosition] == 'u')) {
3555         //          getNextUnicodeChar();
3556         //        } else {
3557         //          if (withoutUnicodePtr != 0) {
3558         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3559         //          }
3560         //        }
3561         if (Character.digit(currentCharacter, 16) == -1)
3562           throw new InvalidInputException(INVALID_HEXA);
3563         //---end forcing--
3564         while (getNextCharAsDigit(16)) {
3565         }
3566         ;
3567         //        if (getNextChar('l', 'L') >= 0)
3568         //          return TokenNameLongLiteral;
3569         //        else
3570         return TokenNameIntegerLiteral;
3571       }
3572       //there is x or X in the number
3573       //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3574       // 00078.0 is true !!!!! crazy language
3575       if (getNextCharAsDigit()) {
3576         //-------------potential octal-----------------
3577         while (getNextCharAsDigit()) {
3578         }
3579         ;
3580         //        if (getNextChar('l', 'L') >= 0) {
3581         //          return TokenNameLongLiteral;
3582         //        }
3583         //
3584         //        if (getNextChar('f', 'F') >= 0) {
3585         //          return TokenNameFloatingPointLiteral;
3586         //        }
3587         if (getNextChar('d', 'D') >= 0) {
3588           return TokenNameDoubleLiteral;
3589         } else { //make the distinction between octal and float ....
3590           if (getNextChar('.')) { //bingo ! ....
3591             while (getNextCharAsDigit()) {
3592             }
3593             ;
3594             if (getNextChar('e', 'E') >= 0) {
3595               // consume next character
3596               unicodeAsBackSlash = false;
3597               currentCharacter = source[currentPosition++];
3598               //              if (((currentCharacter = source[currentPosition++]) == '\\')
3599               //                && (source[currentPosition] == 'u')) {
3600               //                getNextUnicodeChar();
3601               //              } else {
3602               //                if (withoutUnicodePtr != 0) {
3603               //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3604               //                }
3605               //              }
3606               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3607                 // consume next character
3608                 unicodeAsBackSlash = false;
3609                 currentCharacter = source[currentPosition++];
3610                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3611                 //                  && (source[currentPosition] == 'u')) {
3612                 //                  getNextUnicodeChar();
3613                 //                } else {
3614                 //                  if (withoutUnicodePtr != 0) {
3615                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3616                 //                      currentCharacter;
3617                 //                  }
3618                 //                }
3619               }
3620               if (!Character.isDigit(currentCharacter))
3621                 throw new InvalidInputException(INVALID_FLOAT);
3622               while (getNextCharAsDigit()) {
3623               }
3624               ;
3625             }
3626             //            if (getNextChar('f', 'F') >= 0)
3627             //              return TokenNameFloatingPointLiteral;
3628             getNextChar('d', 'D'); //jump over potential d or D
3629             return TokenNameDoubleLiteral;
3630           } else {
3631             return TokenNameIntegerLiteral;
3632           }
3633         }
3634       } else {
3635         /* carry on */
3636       }
3637     }
3638     while (getNextCharAsDigit()) {
3639     }
3640     ;
3641     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3642     //      return TokenNameLongLiteral;
3643     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3644       while (getNextCharAsDigit()) {
3645       }
3646       ;
3647       floating = true;
3648     }
3649     //if floating is true both exponant and suffix may be optional
3650     if (getNextChar('e', 'E') >= 0) {
3651       floating = true;
3652       // consume next character
3653       unicodeAsBackSlash = false;
3654       currentCharacter = source[currentPosition++];
3655       //      if (((currentCharacter = source[currentPosition++]) == '\\')
3656       //        && (source[currentPosition] == 'u')) {
3657       //        getNextUnicodeChar();
3658       //      } else {
3659       //        if (withoutUnicodePtr != 0) {
3660       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3661       //        }
3662       //      }
3663       if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3664         // next
3665         // character
3666         unicodeAsBackSlash = false;
3667         currentCharacter = source[currentPosition++];
3668         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3669         //          && (source[currentPosition] == 'u')) {
3670         //          getNextUnicodeChar();
3671         //        } else {
3672         //          if (withoutUnicodePtr != 0) {
3673         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3674         //          }
3675         //        }
3676       }
3677       if (!Character.isDigit(currentCharacter))
3678         throw new InvalidInputException(INVALID_FLOAT);
3679       while (getNextCharAsDigit()) {
3680       }
3681       ;
3682     }
3683     if (getNextChar('d', 'D') >= 0)
3684       return TokenNameDoubleLiteral;
3685     //    if (getNextChar('f', 'F') >= 0)
3686     //      return TokenNameFloatingPointLiteral;
3687     //the long flag has been tested before
3688     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3689   }
3690
3691   /**
3692    * Search the line number corresponding to a specific position
3693    *
3694    */
3695   public final int getLineNumber(int position) {
3696     if (lineEnds == null)
3697       return 1;
3698     int length = linePtr + 1;
3699     if (length == 0)
3700       return 1;
3701     int g = 0, d = length - 1;
3702     int m = 0;
3703     while (g <= d) {
3704       m = (g + d) / 2;
3705       if (position < lineEnds[m]) {
3706         d = m - 1;
3707       } else if (position > lineEnds[m]) {
3708         g = m + 1;
3709       } else {
3710         return m + 1;
3711       }
3712     }
3713     if (position < lineEnds[m]) {
3714       return m + 1;
3715     }
3716     return m + 2;
3717   }
3718
3719   public void setPHPMode(boolean mode) {
3720     phpMode = mode;
3721   }
3722
3723   public final void setSource(char[] source) {
3724     setSource(null, source);
3725   }
3726
3727   public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3728     //the source-buffer is set to sourceString
3729     this.compilationUnit = compilationUnit;
3730     if (source == null) {
3731       this.source = new char[0];
3732     } else {
3733       this.source = source;
3734     }
3735     startPosition = -1;
3736     initialPosition = currentPosition = 0;
3737     containsAssertKeyword = false;
3738     withoutUnicodeBuffer = new char[this.source.length];
3739     encapsedStringStack = new Stack();
3740   }
3741
3742   public String toString() {
3743     if (startPosition == source.length)
3744       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3745     if (currentPosition > source.length)
3746       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3747     char front[] = new char[startPosition];
3748     System.arraycopy(source, 0, front, 0, startPosition);
3749     int middleLength = (currentPosition - 1) - startPosition + 1;
3750     char middle[];
3751     if (middleLength > -1) {
3752       middle = new char[middleLength];
3753       System.arraycopy(source, startPosition, middle, 0, middleLength);
3754     } else {
3755       middle = new char[0];
3756     }
3757     char end[] = new char[source.length - (currentPosition - 1)];
3758     System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3759     return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3760         + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3761         + new String(end);
3762   }
3763
3764   public final String toStringAction(int act) {
3765     switch (act) {
3766     case TokenNameERROR:
3767       return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3768     // //$NON-NLS-1$
3769     case TokenNameINLINE_HTML:
3770       return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3771     case TokenNameIdentifier:
3772       return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3773     case TokenNameVariable:
3774       return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3775     case TokenNameabstract:
3776       return "abstract"; //$NON-NLS-1$
3777     case TokenNameand:
3778       return "AND"; //$NON-NLS-1$
3779     case TokenNamearray:
3780       return "array"; //$NON-NLS-1$
3781     case TokenNameas:
3782       return "as"; //$NON-NLS-1$
3783     case TokenNamebreak:
3784       return "break"; //$NON-NLS-1$
3785     case TokenNamecase:
3786       return "case"; //$NON-NLS-1$
3787     case TokenNameclass:
3788       return "class"; //$NON-NLS-1$
3789     case TokenNamecatch:
3790       return "catch"; //$NON-NLS-1$
3791     case TokenNameclone:
3792       //$NON-NLS-1$
3793       return "clone";
3794     case TokenNameconst:
3795       //$NON-NLS-1$
3796       return "const";
3797     case TokenNamecontinue:
3798       return "continue"; //$NON-NLS-1$
3799     case TokenNamedefault:
3800       return "default"; //$NON-NLS-1$
3801     //      case TokenNamedefine :
3802     //        return "define"; //$NON-NLS-1$
3803     case TokenNamedo:
3804       return "do"; //$NON-NLS-1$
3805     case TokenNameecho:
3806       return "echo"; //$NON-NLS-1$
3807     case TokenNameelse:
3808       return "else"; //$NON-NLS-1$
3809     case TokenNameelseif:
3810       return "elseif"; //$NON-NLS-1$
3811     case TokenNameendfor:
3812       return "endfor"; //$NON-NLS-1$
3813     case TokenNameendforeach:
3814       return "endforeach"; //$NON-NLS-1$
3815     case TokenNameendif:
3816       return "endif"; //$NON-NLS-1$
3817     case TokenNameendswitch:
3818       return "endswitch"; //$NON-NLS-1$
3819     case TokenNameendwhile:
3820       return "endwhile"; //$NON-NLS-1$
3821     case TokenNameexit:
3822       return "exit";
3823     case TokenNameextends:
3824       return "extends"; //$NON-NLS-1$
3825     //      case TokenNamefalse :
3826     //        return "false"; //$NON-NLS-1$
3827     case TokenNamefinal:
3828       return "final"; //$NON-NLS-1$
3829     case TokenNamefor:
3830       return "for"; //$NON-NLS-1$
3831     case TokenNameforeach:
3832       return "foreach"; //$NON-NLS-1$
3833     case TokenNamefunction:
3834       return "function"; //$NON-NLS-1$
3835     case TokenNameglobal:
3836       return "global"; //$NON-NLS-1$
3837     case TokenNameif:
3838       return "if"; //$NON-NLS-1$
3839     case TokenNameimplements:
3840       return "implements"; //$NON-NLS-1$
3841     case TokenNameinclude:
3842       return "include"; //$NON-NLS-1$
3843     case TokenNameinclude_once:
3844       return "include_once"; //$NON-NLS-1$
3845     case TokenNameinstanceof:
3846       return "instanceof"; //$NON-NLS-1$
3847     case TokenNameinterface:
3848       return "interface"; //$NON-NLS-1$
3849     case TokenNameisset:
3850       return "isset"; //$NON-NLS-1$
3851     case TokenNamelist:
3852       return "list"; //$NON-NLS-1$
3853     case TokenNamenew:
3854       return "new"; //$NON-NLS-1$
3855     //      case TokenNamenull :
3856     //        return "null"; //$NON-NLS-1$
3857     case TokenNameor:
3858       return "OR"; //$NON-NLS-1$
3859     case TokenNameprint:
3860       return "print"; //$NON-NLS-1$
3861     case TokenNameprivate:
3862       return "private"; //$NON-NLS-1$
3863     case TokenNameprotected:
3864       return "protected"; //$NON-NLS-1$
3865     case TokenNamepublic:
3866       return "public"; //$NON-NLS-1$
3867     case TokenNamerequire:
3868       return "require"; //$NON-NLS-1$
3869     case TokenNamerequire_once:
3870       return "require_once"; //$NON-NLS-1$
3871     case TokenNamereturn:
3872       return "return"; //$NON-NLS-1$
3873     case TokenNamestatic:
3874       return "static"; //$NON-NLS-1$
3875     case TokenNameswitch:
3876       return "switch"; //$NON-NLS-1$
3877     //      case TokenNametrue :
3878     //        return "true"; //$NON-NLS-1$
3879     case TokenNameunset:
3880       return "unset"; //$NON-NLS-1$
3881     case TokenNamevar:
3882       return "var"; //$NON-NLS-1$
3883     case TokenNamewhile:
3884       return "while"; //$NON-NLS-1$
3885     case TokenNamexor:
3886       return "XOR"; //$NON-NLS-1$
3887     //      case TokenNamethis :
3888     //        return "$this"; //$NON-NLS-1$
3889     case TokenNameIntegerLiteral:
3890       return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3891     case TokenNameDoubleLiteral:
3892       return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3893     case TokenNameStringDoubleQuote:
3894       return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3895     case TokenNameStringSingleQuote:
3896       return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3897     case TokenNameStringInterpolated:
3898       return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3899     case TokenNameEncapsedString0:
3900       return "`"; //$NON-NLS-1$
3901     case TokenNameEncapsedString1:
3902       return "\'"; //$NON-NLS-1$
3903     case TokenNameEncapsedString2:
3904       return "\""; //$NON-NLS-1$
3905     case TokenNameSTRING:
3906       return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3907     case TokenNameHEREDOC:
3908       return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3909     case TokenNamePLUS_PLUS:
3910       return "++"; //$NON-NLS-1$
3911     case TokenNameMINUS_MINUS:
3912       return "--"; //$NON-NLS-1$
3913     case TokenNameEQUAL_EQUAL:
3914       return "=="; //$NON-NLS-1$
3915     case TokenNameEQUAL_EQUAL_EQUAL:
3916       return "==="; //$NON-NLS-1$
3917     case TokenNameEQUAL_GREATER:
3918       return "=>"; //$NON-NLS-1$
3919     case TokenNameLESS_EQUAL:
3920       return "<="; //$NON-NLS-1$
3921     case TokenNameGREATER_EQUAL:
3922       return ">="; //$NON-NLS-1$
3923     case TokenNameNOT_EQUAL:
3924       return "!="; //$NON-NLS-1$
3925     case TokenNameNOT_EQUAL_EQUAL:
3926       return "!=="; //$NON-NLS-1$
3927     case TokenNameLEFT_SHIFT:
3928       return "<<"; //$NON-NLS-1$
3929     case TokenNameRIGHT_SHIFT:
3930       return ">>"; //$NON-NLS-1$
3931     case TokenNamePLUS_EQUAL:
3932       return "+="; //$NON-NLS-1$
3933     case TokenNameMINUS_EQUAL:
3934       return "-="; //$NON-NLS-1$
3935     case TokenNameMULTIPLY_EQUAL:
3936       return "*="; //$NON-NLS-1$
3937     case TokenNameDIVIDE_EQUAL:
3938       return "/="; //$NON-NLS-1$
3939     case TokenNameAND_EQUAL:
3940       return "&="; //$NON-NLS-1$
3941     case TokenNameOR_EQUAL:
3942       return "|="; //$NON-NLS-1$
3943     case TokenNameXOR_EQUAL:
3944       return "^="; //$NON-NLS-1$
3945     case TokenNameREMAINDER_EQUAL:
3946       return "%="; //$NON-NLS-1$
3947     case TokenNameDOT_EQUAL:
3948       return ".="; //$NON-NLS-1$
3949     case TokenNameLEFT_SHIFT_EQUAL:
3950       return "<<="; //$NON-NLS-1$
3951     case TokenNameRIGHT_SHIFT_EQUAL:
3952       return ">>="; //$NON-NLS-1$
3953     case TokenNameOR_OR:
3954       return "||"; //$NON-NLS-1$
3955     case TokenNameAND_AND:
3956       return "&&"; //$NON-NLS-1$
3957     case TokenNamePLUS:
3958       return "+"; //$NON-NLS-1$
3959     case TokenNameMINUS:
3960       return "-"; //$NON-NLS-1$
3961     case TokenNameMINUS_GREATER:
3962       return "->";
3963     case TokenNameNOT:
3964       return "!"; //$NON-NLS-1$
3965     case TokenNameREMAINDER:
3966       return "%"; //$NON-NLS-1$
3967     case TokenNameXOR:
3968       return "^"; //$NON-NLS-1$
3969     case TokenNameAND:
3970       return "&"; //$NON-NLS-1$
3971     case TokenNameMULTIPLY:
3972       return "*"; //$NON-NLS-1$
3973     case TokenNameOR:
3974       return "|"; //$NON-NLS-1$
3975     case TokenNameTWIDDLE:
3976       return "~"; //$NON-NLS-1$
3977     case TokenNameTWIDDLE_EQUAL:
3978       return "~="; //$NON-NLS-1$
3979     case TokenNameDIVIDE:
3980       return "/"; //$NON-NLS-1$
3981     case TokenNameGREATER:
3982       return ">"; //$NON-NLS-1$
3983     case TokenNameLESS:
3984       return "<"; //$NON-NLS-1$
3985     case TokenNameLPAREN:
3986       return "("; //$NON-NLS-1$
3987     case TokenNameRPAREN:
3988       return ")"; //$NON-NLS-1$
3989     case TokenNameLBRACE:
3990       return "{"; //$NON-NLS-1$
3991     case TokenNameRBRACE:
3992       return "}"; //$NON-NLS-1$
3993     case TokenNameLBRACKET:
3994       return "["; //$NON-NLS-1$
3995     case TokenNameRBRACKET:
3996       return "]"; //$NON-NLS-1$
3997     case TokenNameSEMICOLON:
3998       return ";"; //$NON-NLS-1$
3999     case TokenNameQUESTION:
4000       return "?"; //$NON-NLS-1$
4001     case TokenNameCOLON:
4002       return ":"; //$NON-NLS-1$
4003     case TokenNameCOMMA:
4004       return ","; //$NON-NLS-1$
4005     case TokenNameDOT:
4006       return "."; //$NON-NLS-1$
4007     case TokenNameEQUAL:
4008       return "="; //$NON-NLS-1$
4009     case TokenNameAT:
4010       return "@";
4011     case TokenNameDOLLAR:
4012       return "$";
4013     case TokenNameDOLLAR_LBRACE:
4014       return "${";
4015     case TokenNameLBRACE_DOLLAR:
4016       return "{$";
4017     case TokenNameEOF:
4018       return "EOF"; //$NON-NLS-1$
4019     case TokenNameWHITESPACE:
4020       return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4021     case TokenNameCOMMENT_LINE:
4022       return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4023     case TokenNameCOMMENT_BLOCK:
4024       return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4025     case TokenNameCOMMENT_PHPDOC:
4026       return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4027     //      case TokenNameHTML :
4028     //        return "HTML(" + new String(getCurrentTokenSource()) + ")";
4029     // //$NON-NLS-1$
4030     case TokenNameFILE:
4031       return "__FILE__"; //$NON-NLS-1$
4032     case TokenNameLINE:
4033       return "__LINE__"; //$NON-NLS-1$
4034     case TokenNameCLASS_C:
4035       return "__CLASS__"; //$NON-NLS-1$
4036     case TokenNameMETHOD_C:
4037       return "__METHOD__"; //$NON-NLS-1$
4038     case TokenNameFUNC_C:
4039       return "__FUNCTION__"; //$NON-NLS-1
4040     case TokenNameboolCAST:
4041       return "( bool )"; //$NON-NLS-1$
4042     case TokenNameintCAST:
4043       return "( int )"; //$NON-NLS-1$
4044     case TokenNamedoubleCAST:
4045       return "( double )"; //$NON-NLS-1$
4046     case TokenNameobjectCAST:
4047       return "( object )"; //$NON-NLS-1$
4048     case TokenNamestringCAST:
4049       return "( string )"; //$NON-NLS-1$
4050     default:
4051       return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
4052     }
4053   }
4054
4055   public Scanner() {
4056     this(false, false);
4057   }
4058
4059   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
4060     this(tokenizeComments, tokenizeWhiteSpace, false);
4061   }
4062
4063   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
4064     this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
4065   }
4066
4067   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4068       boolean assertMode) {
4069     this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null, true);
4070   }
4071
4072   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4073       boolean assertMode, boolean tokenizeStrings, char[][] taskTags, char[][] taskPriorities, boolean isTaskCaseSensitive) {
4074     this.eofPosition = Integer.MAX_VALUE;
4075     this.tokenizeComments = tokenizeComments;
4076     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
4077     this.tokenizeStrings = tokenizeStrings;
4078     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
4079     this.assertMode = assertMode;
4080     this.encapsedStringStack = null;
4081     this.taskTags = taskTags;
4082     this.taskPriorities = taskPriorities;
4083   }
4084
4085   private void checkNonExternalizeString() throws InvalidInputException {
4086     if (currentLine == null)
4087       return;
4088     parseTags(currentLine);
4089   }
4090
4091   private void parseTags(NLSLine line) throws InvalidInputException {
4092     String s = new String(getCurrentTokenSource());
4093     int pos = s.indexOf(TAG_PREFIX);
4094     int lineLength = line.size();
4095     while (pos != -1) {
4096       int start = pos + TAG_PREFIX_LENGTH;
4097       int end = s.indexOf(TAG_POSTFIX, start);
4098       String index = s.substring(start, end);
4099       int i = 0;
4100       try {
4101         i = Integer.parseInt(index) - 1;
4102         // Tags are one based not zero based.
4103       } catch (NumberFormatException e) {
4104         i = -1; // we don't want to consider this as a valid NLS tag
4105       }
4106       if (line.exists(i)) {
4107         line.set(i, null);
4108       }
4109       pos = s.indexOf(TAG_PREFIX, start);
4110     }
4111     this.nonNLSStrings = new StringLiteral[lineLength];
4112     int nonNLSCounter = 0;
4113     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4114       StringLiteral literal = (StringLiteral) iterator.next();
4115       if (literal != null) {
4116         this.nonNLSStrings[nonNLSCounter++] = literal;
4117       }
4118     }
4119     if (nonNLSCounter == 0) {
4120       this.nonNLSStrings = null;
4121       currentLine = null;
4122       return;
4123     }
4124     this.wasNonExternalizedStringLiteral = true;
4125     if (nonNLSCounter != lineLength) {
4126       System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4127     }
4128     currentLine = null;
4129   }
4130
4131   public final void scanEscapeCharacter() throws InvalidInputException {
4132     // the string with "\\u" is a legal string of two chars \ and u
4133     //thus we use a direct access to the source (for regular cases).
4134     if (unicodeAsBackSlash) {
4135       // consume next character
4136       unicodeAsBackSlash = false;
4137       //                        if (((currentCharacter = source[currentPosition++]) == '\\') &&
4138       // (source[currentPosition] == 'u')) {
4139       //                                getNextUnicodeChar();
4140       //                        } else {
4141       if (withoutUnicodePtr != 0) {
4142         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4143         //                              }
4144       }
4145     } else
4146       currentCharacter = source[currentPosition++];
4147     switch (currentCharacter) {
4148     case 'b':
4149       currentCharacter = '\b';
4150       break;
4151     case 't':
4152       currentCharacter = '\t';
4153       break;
4154     case 'n':
4155       currentCharacter = '\n';
4156       break;
4157     case 'f':
4158       currentCharacter = '\f';
4159       break;
4160     case 'r':
4161       currentCharacter = '\r';
4162       break;
4163     case '\"':
4164       currentCharacter = '\"';
4165       break;
4166     case '\'':
4167       currentCharacter = '\'';
4168       break;
4169     case '\\':
4170       currentCharacter = '\\';
4171       break;
4172     default:
4173       // -----------octal escape--------------
4174       // OctalDigit
4175       // OctalDigit OctalDigit
4176       // ZeroToThree OctalDigit OctalDigit
4177       int number = Character.getNumericValue(currentCharacter);
4178       if (number >= 0 && number <= 7) {
4179         boolean zeroToThreeNot = number > 3;
4180         if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4181           int digit = Character.getNumericValue(currentCharacter);
4182           if (digit >= 0 && digit <= 7) {
4183             number = (number * 8) + digit;
4184             if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4185               if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4186                 // Digit --> ignore last character
4187                 currentPosition--;
4188               } else {
4189                 digit = Character.getNumericValue(currentCharacter);
4190                 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4191                   // OctalDigit OctalDigit
4192                   number = (number * 8) + digit;
4193                 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4194                   // --> ignore last character
4195                   currentPosition--;
4196                 }
4197               }
4198             } else { // has read \OctalDigit NonDigit--> ignore last
4199               // character
4200               currentPosition--;
4201             }
4202           } else { // has read \OctalDigit NonOctalDigit--> ignore last
4203             // character
4204             currentPosition--;
4205           }
4206         } else { // has read \OctalDigit --> ignore last character
4207           currentPosition--;
4208         }
4209         if (number > 255)
4210           throw new InvalidInputException(INVALID_ESCAPE);
4211         currentCharacter = (char) number;
4212       } else
4213         throw new InvalidInputException(INVALID_ESCAPE);
4214     }
4215   }
4216
4217   //chech presence of task: tags
4218   //TODO (frederic) see if we need to take unicode characters into account...
4219   public void checkTaskTag(int commentStart, int commentEnd) {
4220     char[] src = this.source;
4221
4222     // only look for newer task: tags
4223     if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4224       return;
4225     }
4226     int foundTaskIndex = this.foundTaskCount;
4227     char previous = src[commentStart + 1]; // should be '*' or '/'
4228     nextChar: for (int i = commentStart + 2; i < commentEnd && i < this.eofPosition; i++) {
4229       char[] tag = null;
4230       char[] priority = null;
4231       // check for tag occurrence only if not ambiguous with javadoc tag
4232       if (previous != '@') {
4233         nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4234           tag = this.taskTags[itag];
4235           int tagLength = tag.length;
4236           if (tagLength == 0)
4237             continue nextTag;
4238
4239           // ensure tag is not leaded with letter if tag starts with a letter
4240           if (Character.isJavaIdentifierStart(tag[0])) {
4241             if (Character.isJavaIdentifierPart(previous)) {
4242               continue nextTag;
4243             }
4244           }
4245
4246           for (int t = 0; t < tagLength; t++) {
4247             char sc, tc;
4248             int x = i + t;
4249             if (x >= this.eofPosition || x >= commentEnd)
4250               continue nextTag;
4251             if ((sc = src[i + t]) != (tc = tag[t])) { // case sensitive check
4252               if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) { // case insensitive check
4253                 continue nextTag;
4254               }
4255             }
4256           }
4257           // ensure tag is not followed with letter if tag finishes with a letter
4258           if (i + tagLength < commentEnd && Character.isJavaIdentifierPart(src[i + tagLength - 1])) {
4259             if (Character.isJavaIdentifierPart(src[i + tagLength]))
4260               continue nextTag;
4261           }
4262           if (this.foundTaskTags == null) {
4263             this.foundTaskTags = new char[5][];
4264             this.foundTaskMessages = new char[5][];
4265             this.foundTaskPriorities = new char[5][];
4266             this.foundTaskPositions = new int[5][];
4267           } else if (this.foundTaskCount == this.foundTaskTags.length) {
4268             System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4269                 this.foundTaskCount);
4270             System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4271                 this.foundTaskCount);
4272             System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4273                 this.foundTaskCount);
4274             System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4275                 this.foundTaskCount);
4276           }
4277
4278           priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4279
4280           this.foundTaskTags[this.foundTaskCount] = tag;
4281           this.foundTaskPriorities[this.foundTaskCount] = priority;
4282           this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4283           this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4284           this.foundTaskCount++;
4285           i += tagLength - 1; // will be incremented when looping
4286           break nextTag;
4287         }
4288       }
4289       previous = src[i];
4290     }
4291     for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4292       // retrieve message start and end positions
4293       int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4294       int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4295       // at most beginning of next task
4296       if (max_value < msgStart) {
4297         max_value = msgStart; // would only occur if tag is before EOF.
4298       }
4299       int end = -1;
4300       char c;
4301       for (int j = msgStart; j < max_value; j++) {
4302         if ((c = src[j]) == '\n' || c == '\r') {
4303           end = j - 1;
4304           break;
4305         }
4306       }
4307       if (end == -1) {
4308         for (int j = max_value; j > msgStart; j--) {
4309           if ((c = src[j]) == '*') {
4310             end = j - 1;
4311             break;
4312           }
4313         }
4314         if (end == -1)
4315           end = max_value;
4316       }
4317       if (msgStart == end)
4318         continue; // empty
4319       // trim the message
4320       while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
4321         end--;
4322       while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
4323         msgStart++;
4324       // update the end position of the task
4325       this.foundTaskPositions[i][1] = end;
4326       // get the message source
4327       final int messageLength = end - msgStart + 1;
4328       char[] message = new char[messageLength];
4329       System.arraycopy(src, msgStart, message, 0, messageLength);
4330       this.foundTaskMessages[i] = message;
4331     }
4332   }
4333
4334   // chech presence of task: tags
4335   //  public void checkTaskTag(int commentStart, int commentEnd) {
4336   //    // only look for newer task: tags
4337   //    if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4338   //      return;
4339   //    }
4340   //    int foundTaskIndex = this.foundTaskCount;
4341   //    nextChar: for (int i = commentStart; i < commentEnd && i < this.eofPosition; i++) {
4342   //      char[] tag = null;
4343   //      char[] priority = null;
4344   //      // check for tag occurrence
4345   //      nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4346   //        tag = this.taskTags[itag];
4347   //        priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4348   //        int tagLength = tag.length;
4349   //        for (int t = 0; t < tagLength; t++) {
4350   //          if (this.source[i + t] != tag[t])
4351   //            continue nextTag;
4352   //        }
4353   //        if (this.foundTaskTags == null) {
4354   //          this.foundTaskTags = new char[5][];
4355   //          this.foundTaskMessages = new char[5][];
4356   //          this.foundTaskPriorities = new char[5][];
4357   //          this.foundTaskPositions = new int[5][];
4358   //        } else if (this.foundTaskCount == this.foundTaskTags.length) {
4359   //          System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4360   //          System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4361   //              this.foundTaskCount);
4362   //          System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4363   //              this.foundTaskCount);
4364   //          System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4365   //              this.foundTaskCount);
4366   //        }
4367   //        this.foundTaskTags[this.foundTaskCount] = tag;
4368   //        this.foundTaskPriorities[this.foundTaskCount] = priority;
4369   //        this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4370   //        this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4371   //        this.foundTaskCount++;
4372   //        i += tagLength - 1; // will be incremented when looping
4373   //      }
4374   //    }
4375   //    for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4376   //      // retrieve message start and end positions
4377   //      int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4378   //      int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4379   //      // at most beginning of next task
4380   //      if (max_value < msgStart)
4381   //        max_value = msgStart; // would only occur if tag is before EOF.
4382   //      int end = -1;
4383   //      char c;
4384   //      for (int j = msgStart; j < max_value; j++) {
4385   //        if ((c = this.source[j]) == '\n' || c == '\r') {
4386   //          end = j - 1;
4387   //          break;
4388   //        }
4389   //      }
4390   //      if (end == -1) {
4391   //        for (int j = max_value; j > msgStart; j--) {
4392   //          if ((c = this.source[j]) == '*') {
4393   //            end = j - 1;
4394   //            break;
4395   //          }
4396   //        }
4397   //        if (end == -1)
4398   //          end = max_value;
4399   //      }
4400   //      if (msgStart == end)
4401   //        continue; // empty
4402   //      // trim the message
4403   //      while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4404   //        end--;
4405   //      while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4406   //        msgStart++;
4407   //      // update the end position of the task
4408   //      this.foundTaskPositions[i][1] = end;
4409   //      // get the message source
4410   //      final int messageLength = end - msgStart + 1;
4411   //      char[] message = new char[messageLength];
4412   //      System.arraycopy(source, msgStart, message, 0, messageLength);
4413   //      this.foundTaskMessages[i] = message;
4414   //    }
4415   //  }
4416 }