net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java

   1 /***********************************************************************************************************************************
   2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
   3  * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
   4  * and is available at http://www.eclipse.org/legal/cpl-v05.html
   5  *
   6  * Contributors: IBM Corporation - initial API and implementation
   7  **********************************************************************************************************************************/
   8 package net.sourceforge.phpdt.internal.compiler.parser;
   9
  10 import java.util.ArrayList;
  11 import java.util.Iterator;
  12 import java.util.List;
  13 import java.util.Stack;
  14
  15 import net.sourceforge.phpdt.core.compiler.CharOperation;
  16 import net.sourceforge.phpdt.core.compiler.IScanner;
  17 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
  18 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
  19 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
  20 import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
  21
  22 public class Scanner implements IScanner, ITerminalSymbols {
  23   /*
  24    * APIs ares - getNextToken() which return the current type of the token (this value is not memorized by the scanner) -
  25    * getCurrentTokenSource() which provides with the token "REAL" source (aka all unicode have been transformed into a correct char) -
  26    * sourceStart gives the position into the stream - currentPosition-1 gives the sourceEnd position into the stream
  27    */
  28   // 1.4 feature
  29   private boolean assertMode;
  30
  31   public boolean useAssertAsAnIndentifier = false;
  32
  33   //flag indicating if processed source contains occurrences of keyword assert
  34   public boolean containsAssertKeyword = false;
  35
  36   public boolean recordLineSeparator;
  37
  38   public boolean ignorePHPOneLiner = false;
  39
  40   public boolean phpMode = false;
  41
  42   public Stack encapsedStringStack = null;
  43
  44   public char currentCharacter;
  45
  46   public int startPosition;
  47
  48   public int currentPosition;
  49
  50   public int initialPosition, eofPosition;
  51
  52   // after this position eof are generated instead of real token from the
  53   // source
  54   public boolean tokenizeComments;
  55
  56   public boolean tokenizeWhiteSpace;
  57
  58   public boolean tokenizeStrings;
  59
  60   //source should be viewed as a window (aka a part)
  61   //of a entire very large stream
  62   public char source[];
  63
  64   //unicode support
  65   public char[] withoutUnicodeBuffer;
  66
  67   public int withoutUnicodePtr;
  68
  69   //when == 0 ==> no unicode in the current token
  70   public boolean unicodeAsBackSlash = false;
  71
  72   public boolean scanningFloatLiteral = false;
  73
  74   //support for /** comments
  75   public int[] commentStops = new int[10];
  76
  77   public int[] commentStarts = new int[10];
  78
  79   public int commentPtr = -1; // no comment test with commentPtr value -1
  80
  81   protected int lastCommentLinePosition = -1;
  82
  83   //diet parsing support - jump over some method body when requested
  84   public boolean diet = false;
  85
  86   //support for the poor-line-debuggers ....
  87   //remember the position of the cr/lf
  88   public int[] lineEnds = new int[250];
  89
  90   public int linePtr = -1;
  91
  92   public boolean wasAcr = false;
  93
  94   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
  95
  96   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
  97
  98   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
  99
 100   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
 101
 102   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
 103
 104   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
 105
 106   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
 107
 108   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
 109
 110   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
 111
 112   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
 113
 114   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
 115
 116   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
 117
 118   //----------------optimized identifier managment------------------
 119   static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
 120       charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
 121       charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
 122       charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
 123       charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
 124       charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
 125       charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
 126       charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
 127       charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
 128
 129   static final char[] charArray_va = new char[] { '$', 'a' }, charArray_vb = new char[] { '$', 'b' }, charArray_vc = new char[] { '$', 'c' },
 130       charArray_vd = new char[] { '$', 'd' }, charArray_ve = new char[] { '$', 'e' }, charArray_vf = new char[] { '$', 'f' },
 131       charArray_vg = new char[] { '$', 'g' }, charArray_vh = new char[] { '$', 'h' }, charArray_vi = new char[] { '$', 'i' },
 132       charArray_vj = new char[] { '$', 'j' }, charArray_vk = new char[] { '$', 'k' }, charArray_vl = new char[] { '$', 'l' },
 133       charArray_vm = new char[] { '$', 'm' }, charArray_vn = new char[] { '$', 'n' }, charArray_vo = new char[] { '$', 'o' },
 134       charArray_vp = new char[] { '$', 'p' }, charArray_vq = new char[] { '$', 'q' }, charArray_vr = new char[] { '$', 'r' },
 135       charArray_vs = new char[] { '$', 's' }, charArray_vt = new char[] { '$', 't' }, charArray_vu = new char[] { '$', 'u' },
 136       charArray_vv = new char[] { '$', 'v' }, charArray_vw = new char[] { '$', 'w' }, charArray_vx = new char[] { '$', 'x' },
 137       charArray_vy = new char[] { '$', 'y' }, charArray_vz = new char[] { '$', 'z' };
 138
 139   static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
 140
 141   static final int TableSize = 30, InternalTableSize = 6;
 142
 143   //30*6 = 180 entries
 144   public static final int OptimizedLength = 6;
 145
 146   public/* static */
 147   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
 148
 149   // support for detecting non-externalized string literals
 150   int currentLineNr = -1;
 151
 152   int previousLineNr = -1;
 153
 154   NLSLine currentLine = null;
 155
 156   List lines = new ArrayList();
 157
 158   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
 159
 160   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
 161
 162   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
 163
 164   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
 165
 166   public StringLiteral[] nonNLSStrings = null;
 167
 168   public boolean checkNonExternalizedStringLiterals = true;
 169
 170   public boolean wasNonExternalizedStringLiteral = false;
 171   /* static */{
 172     for (int i = 0; i < 6; i++) {
 173       for (int j = 0; j < TableSize; j++) {
 174         for (int k = 0; k < InternalTableSize; k++) {
 175           charArray_length[i][j][k] = initCharArray;
 176         }
 177       }
 178     }
 179   }
 180
 181   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
 182
 183   public static final int RoundBracket = 0;
 184
 185   public static final int SquareBracket = 1;
 186
 187   public static final int CurlyBracket = 2;
 188
 189   public static final int BracketKinds = 3;
 190
 191   // task tag support
 192   public char[][] foundTaskTags = null;
 193
 194   public char[][] foundTaskMessages;
 195
 196   public char[][] foundTaskPriorities = null;
 197
 198   public int[][] foundTaskPositions;
 199
 200   public int foundTaskCount = 0;
 201
 202   public char[][] taskTags = null;
 203
 204   public char[][] taskPriorities = null;
 205
 206   public boolean isTaskCaseSensitive = true;
 207
 208   public static final boolean DEBUG = false;
 209
 210   public static final boolean TRACE = false;
 211
 212   public ICompilationUnit compilationUnit = null;
 213
 214   /**
 215    * Determines if the specified character is permissible as the first character in a PHP identifier.
 216    *
 217    * The '$' character for HP variables isn't regarded as the first character !
 218    */
 219   public static boolean isPHPIdentifierStart(char ch) {
 220     return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 221   }
 222
 223   /**
 224    * Determines if the specified character may be part of a PHP identifier as other than the first character
 225    */
 226   public static boolean isPHPIdentifierPart(char ch) {
 227     return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 228   }
 229
 230   public final boolean atEnd() {
 231     // This code is not relevant if source is
 232     // Only a part of the real stream input
 233     return source.length == currentPosition;
 234   }
 235
 236   public char[] getCurrentIdentifierSource() {
 237     //return the token REAL source (aka unicodes are precomputed)
 238     char[] result;
 239     //    if (withoutUnicodePtr != 0)
 240     //      //0 is used as a fast test flag so the real first char is in position 1
 241     //      System.arraycopy(
 242     //        withoutUnicodeBuffer,
 243     //        1,
 244     //        result = new char[withoutUnicodePtr],
 245     //        0,
 246     //        withoutUnicodePtr);
 247     //    else {
 248     int length = currentPosition - startPosition;
 249     switch (length) { // see OptimizedLength
 250     case 1:
 251       return optimizedCurrentTokenSource1();
 252     case 2:
 253       return optimizedCurrentTokenSource2();
 254     case 3:
 255       return optimizedCurrentTokenSource3();
 256     case 4:
 257       return optimizedCurrentTokenSource4();
 258     case 5:
 259       return optimizedCurrentTokenSource5();
 260     case 6:
 261       return optimizedCurrentTokenSource6();
 262     }
 263     //no optimization
 264     System.arraycopy(source, startPosition, result = new char[length], 0, length);
 265     //   }
 266     return result;
 267   }
 268
 269   public int getCurrentTokenEndPosition() {
 270     return this.currentPosition - 1;
 271   }
 272
 273   public final char[] getCurrentTokenSource() {
 274     // Return the token REAL source (aka unicodes are precomputed)
 275     char[] result;
 276     //    if (withoutUnicodePtr != 0)
 277     //      // 0 is used as a fast test flag so the real first char is in position 1
 278     //      System.arraycopy(
 279     //        withoutUnicodeBuffer,
 280     //        1,
 281     //        result = new char[withoutUnicodePtr],
 282     //        0,
 283     //        withoutUnicodePtr);
 284     //    else {
 285     int length;
 286     System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
 287     //    }
 288     return result;
 289   }
 290
 291   public final char[] getCurrentTokenSource(int startPos) {
 292     // Return the token REAL source (aka unicodes are precomputed)
 293     char[] result;
 294     //    if (withoutUnicodePtr != 0)
 295     //      // 0 is used as a fast test flag so the real first char is in position 1
 296     //      System.arraycopy(
 297     //        withoutUnicodeBuffer,
 298     //        1,
 299     //        result = new char[withoutUnicodePtr],
 300     //        0,
 301     //        withoutUnicodePtr);
 302     //    else {
 303     int length;
 304     System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
 305     //  }
 306     return result;
 307   }
 308
 309   public final char[] getCurrentTokenSourceString() {
 310     //return the token REAL source (aka unicodes are precomputed).
 311     //REMOVE the two " that are at the beginning and the end.
 312     char[] result;
 313     if (withoutUnicodePtr != 0)
 314       //0 is used as a fast test flag so the real first char is in position 1
 315       System.arraycopy(withoutUnicodeBuffer, 2,
 316       //2 is 1 (real start) + 1 (to jump over the ")
 317           result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
 318     else {
 319       int length;
 320       System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 321     }
 322     return result;
 323   }
 324
 325   public int getCurrentTokenStartPosition() {
 326     return this.startPosition;
 327   }
 328
 329   public final char[] getCurrentStringLiteralSource() {
 330     // Return the token REAL source (aka unicodes are precomputed)
 331     if (startPosition + 1 >= currentPosition) {
 332       return new char[0];
 333     }
 334     char[] result;
 335     int length;
 336     System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 337     //    }
 338     return result;
 339   }
 340
 341   public final char[] getCurrentStringLiteralSource(int startPos) {
 342     // Return the token REAL source (aka unicodes are precomputed)
 343     char[] result;
 344     int length;
 345     System.arraycopy(source, startPos + 1, result = new char[length = currentPosition - startPos - 2], 0, length);
 346     //    }
 347     return result;
 348   }
 349
 350   /*
 351    * Search the source position corresponding to the end of a given line number
 352    *
 353    * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
 354    *
 355    * In case the given line number is inconsistent, answers -1.
 356    */
 357   public final int getLineEnd(int lineNumber) {
 358     if (lineEnds == null)
 359       return -1;
 360     if (lineNumber >= lineEnds.length)
 361       return -1;
 362     if (lineNumber <= 0)
 363       return -1;
 364     if (lineNumber == lineEnds.length - 1)
 365       return eofPosition;
 366     return lineEnds[lineNumber - 1];
 367     // next line start one character behind the lineEnd of the previous line
 368   }
 369
 370   /**
 371    * Search the source position corresponding to the beginning of a given line number
 372    *
 373    * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
 374    *
 375    * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
 376    *
 377    * In case the given line number is inconsistent, answers -1.
 378    */
 379   public final int getLineStart(int lineNumber) {
 380     if (lineEnds == null)
 381       return -1;
 382     if (lineNumber >= lineEnds.length)
 383       return -1;
 384     if (lineNumber <= 0)
 385       return -1;
 386     if (lineNumber == 1)
 387       return initialPosition;
 388     return lineEnds[lineNumber - 2] + 1;
 389     // next line start one character behind the lineEnd of the previous line
 390   }
 391
 392   public final boolean getNextChar(char testedChar) {
 393     //BOOLEAN
 394     //handle the case of unicode.
 395     //when a unicode appears then we must use a buffer that holds char
 396     // internal values
 397     //At the end of this method currentCharacter holds the new visited char
 398     //and currentPosition points right next after it
 399     //Both previous lines are true if the currentCharacter is == to the
 400     // testedChar
 401     //On false, no side effect has occured.
 402     //ALL getNextChar.... ARE OPTIMIZED COPIES
 403     int temp = currentPosition;
 404     try {
 405       currentCharacter = source[currentPosition++];
 406       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 407       //        && (source[currentPosition] == 'u')) {
 408       //        //-------------unicode traitement ------------
 409       //        int c1, c2, c3, c4;
 410       //        int unicodeSize = 6;
 411       //        currentPosition++;
 412       //        while (source[currentPosition] == 'u') {
 413       //          currentPosition++;
 414       //          unicodeSize++;
 415       //        }
 416       //
 417       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 418       //          || c1 < 0)
 419       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 420       //            || c2 < 0)
 421       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 422       //            || c3 < 0)
 423       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 424       //            || c4 < 0)) {
 425       //          currentPosition = temp;
 426       //          return false;
 427       //        }
 428       //
 429       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 430       //        if (currentCharacter != testedChar) {
 431       //          currentPosition = temp;
 432       //          return false;
 433       //        }
 434       //        unicodeAsBackSlash = currentCharacter == '\\';
 435       //
 436       //        //need the unicode buffer
 437       //        if (withoutUnicodePtr == 0) {
 438       //          //buffer all the entries that have been left aside....
 439       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 440       //          System.arraycopy(
 441       //            source,
 442       //            startPosition,
 443       //            withoutUnicodeBuffer,
 444       //            1,
 445       //            withoutUnicodePtr);
 446       //        }
 447       //        //fill the buffer with the char
 448       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 449       //        return true;
 450       //
 451       //      } //-------------end unicode traitement--------------
 452       //      else {
 453       if (currentCharacter != testedChar) {
 454         currentPosition = temp;
 455         return false;
 456       }
 457       unicodeAsBackSlash = false;
 458       //        if (withoutUnicodePtr != 0)
 459       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 460       return true;
 461       //      }
 462     } catch (IndexOutOfBoundsException e) {
 463       unicodeAsBackSlash = false;
 464       currentPosition = temp;
 465       return false;
 466     }
 467   }
 468
 469   public final int getNextChar(char testedChar1, char testedChar2) {
 470     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
 471     //test can be done with (x==0) for the first and (x>0) for the second
 472     //handle the case of unicode.
 473     //when a unicode appears then we must use a buffer that holds char
 474     // internal values
 475     //At the end of this method currentCharacter holds the new visited char
 476     //and currentPosition points right next after it
 477     //Both previous lines are true if the currentCharacter is == to the
 478     // testedChar1/2
 479     //On false, no side effect has occured.
 480     //ALL getNextChar.... ARE OPTIMIZED COPIES
 481     int temp = currentPosition;
 482     try {
 483       int result;
 484       currentCharacter = source[currentPosition++];
 485       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 486       //        && (source[currentPosition] == 'u')) {
 487       //        //-------------unicode traitement ------------
 488       //        int c1, c2, c3, c4;
 489       //        int unicodeSize = 6;
 490       //        currentPosition++;
 491       //        while (source[currentPosition] == 'u') {
 492       //          currentPosition++;
 493       //          unicodeSize++;
 494       //        }
 495       //
 496       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 497       //          || c1 < 0)
 498       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 499       //            || c2 < 0)
 500       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 501       //            || c3 < 0)
 502       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 503       //            || c4 < 0)) {
 504       //          currentPosition = temp;
 505       //          return 2;
 506       //        }
 507       //
 508       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 509       //        if (currentCharacter == testedChar1)
 510       //          result = 0;
 511       //        else if (currentCharacter == testedChar2)
 512       //          result = 1;
 513       //        else {
 514       //          currentPosition = temp;
 515       //          return -1;
 516       //        }
 517       //
 518       //        //need the unicode buffer
 519       //        if (withoutUnicodePtr == 0) {
 520       //          //buffer all the entries that have been left aside....
 521       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 522       //          System.arraycopy(
 523       //            source,
 524       //            startPosition,
 525       //            withoutUnicodeBuffer,
 526       //            1,
 527       //            withoutUnicodePtr);
 528       //        }
 529       //        //fill the buffer with the char
 530       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 531       //        return result;
 532       //      } //-------------end unicode traitement--------------
 533       //      else {
 534       if (currentCharacter == testedChar1)
 535         result = 0;
 536       else if (currentCharacter == testedChar2)
 537         result = 1;
 538       else {
 539         currentPosition = temp;
 540         return -1;
 541       }
 542       //        if (withoutUnicodePtr != 0)
 543       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 544       return result;
 545       //     }
 546     } catch (IndexOutOfBoundsException e) {
 547       currentPosition = temp;
 548       return -1;
 549     }
 550   }
 551
 552   public final boolean getNextCharAsDigit() {
 553     //BOOLEAN
 554     //handle the case of unicode.
 555     //when a unicode appears then we must use a buffer that holds char
 556     // internal values
 557     //At the end of this method currentCharacter holds the new visited char
 558     //and currentPosition points right next after it
 559     //Both previous lines are true if the currentCharacter is a digit
 560     //On false, no side effect has occured.
 561     //ALL getNextChar.... ARE OPTIMIZED COPIES
 562     int temp = currentPosition;
 563     try {
 564       currentCharacter = source[currentPosition++];
 565       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 566       //        && (source[currentPosition] == 'u')) {
 567       //        //-------------unicode traitement ------------
 568       //        int c1, c2, c3, c4;
 569       //        int unicodeSize = 6;
 570       //        currentPosition++;
 571       //        while (source[currentPosition] == 'u') {
 572       //          currentPosition++;
 573       //          unicodeSize++;
 574       //        }
 575       //
 576       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 577       //          || c1 < 0)
 578       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 579       //            || c2 < 0)
 580       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 581       //            || c3 < 0)
 582       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 583       //            || c4 < 0)) {
 584       //          currentPosition = temp;
 585       //          return false;
 586       //        }
 587       //
 588       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 589       //        if (!Character.isDigit(currentCharacter)) {
 590       //          currentPosition = temp;
 591       //          return false;
 592       //        }
 593       //
 594       //        //need the unicode buffer
 595       //        if (withoutUnicodePtr == 0) {
 596       //          //buffer all the entries that have been left aside....
 597       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 598       //          System.arraycopy(
 599       //            source,
 600       //            startPosition,
 601       //            withoutUnicodeBuffer,
 602       //            1,
 603       //            withoutUnicodePtr);
 604       //        }
 605       //        //fill the buffer with the char
 606       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 607       //        return true;
 608       //      } //-------------end unicode traitement--------------
 609       //      else {
 610       if (!Character.isDigit(currentCharacter)) {
 611         currentPosition = temp;
 612         return false;
 613       }
 614       //        if (withoutUnicodePtr != 0)
 615       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 616       return true;
 617       //      }
 618     } catch (IndexOutOfBoundsException e) {
 619       currentPosition = temp;
 620       return false;
 621     }
 622   }
 623
 624   public final boolean getNextCharAsDigit(int radix) {
 625     //BOOLEAN
 626     //handle the case of unicode.
 627     //when a unicode appears then we must use a buffer that holds char
 628     // internal values
 629     //At the end of this method currentCharacter holds the new visited char
 630     //and currentPosition points right next after it
 631     //Both previous lines are true if the currentCharacter is a digit base on
 632     // radix
 633     //On false, no side effect has occured.
 634     //ALL getNextChar.... ARE OPTIMIZED COPIES
 635     int temp = currentPosition;
 636     try {
 637       currentCharacter = source[currentPosition++];
 638       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 639       //        && (source[currentPosition] == 'u')) {
 640       //        //-------------unicode traitement ------------
 641       //        int c1, c2, c3, c4;
 642       //        int unicodeSize = 6;
 643       //        currentPosition++;
 644       //        while (source[currentPosition] == 'u') {
 645       //          currentPosition++;
 646       //          unicodeSize++;
 647       //        }
 648       //
 649       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 650       //          || c1 < 0)
 651       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 652       //            || c2 < 0)
 653       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 654       //            || c3 < 0)
 655       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 656       //            || c4 < 0)) {
 657       //          currentPosition = temp;
 658       //          return false;
 659       //        }
 660       //
 661       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 662       //        if (Character.digit(currentCharacter, radix) == -1) {
 663       //          currentPosition = temp;
 664       //          return false;
 665       //        }
 666       //
 667       //        //need the unicode buffer
 668       //        if (withoutUnicodePtr == 0) {
 669       //          //buffer all the entries that have been left aside....
 670       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 671       //          System.arraycopy(
 672       //            source,
 673       //            startPosition,
 674       //            withoutUnicodeBuffer,
 675       //            1,
 676       //            withoutUnicodePtr);
 677       //        }
 678       //        //fill the buffer with the char
 679       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 680       //        return true;
 681       //      } //-------------end unicode traitement--------------
 682       //      else {
 683       if (Character.digit(currentCharacter, radix) == -1) {
 684         currentPosition = temp;
 685         return false;
 686       }
 687       //        if (withoutUnicodePtr != 0)
 688       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 689       return true;
 690       //      }
 691     } catch (IndexOutOfBoundsException e) {
 692       currentPosition = temp;
 693       return false;
 694     }
 695   }
 696
 697   public boolean getNextCharAsJavaIdentifierPart() {
 698     //BOOLEAN
 699     //handle the case of unicode.
 700     //when a unicode appears then we must use a buffer that holds char
 701     // internal values
 702     //At the end of this method currentCharacter holds the new visited char
 703     //and currentPosition points right next after it
 704     //Both previous lines are true if the currentCharacter is a
 705     // JavaIdentifierPart
 706     //On false, no side effect has occured.
 707     //ALL getNextChar.... ARE OPTIMIZED COPIES
 708     int temp = currentPosition;
 709     try {
 710       currentCharacter = source[currentPosition++];
 711       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 712       //        && (source[currentPosition] == 'u')) {
 713       //        //-------------unicode traitement ------------
 714       //        int c1, c2, c3, c4;
 715       //        int unicodeSize = 6;
 716       //        currentPosition++;
 717       //        while (source[currentPosition] == 'u') {
 718       //          currentPosition++;
 719       //          unicodeSize++;
 720       //        }
 721       //
 722       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 723       //          || c1 < 0)
 724       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 725       //            || c2 < 0)
 726       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 727       //            || c3 < 0)
 728       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 729       //            || c4 < 0)) {
 730       //          currentPosition = temp;
 731       //          return false;
 732       //        }
 733       //
 734       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 735       //        if (!isPHPIdentifierPart(currentCharacter)) {
 736       //          currentPosition = temp;
 737       //          return false;
 738       //        }
 739       //
 740       //        //need the unicode buffer
 741       //        if (withoutUnicodePtr == 0) {
 742       //          //buffer all the entries that have been left aside....
 743       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 744       //          System.arraycopy(
 745       //            source,
 746       //            startPosition,
 747       //            withoutUnicodeBuffer,
 748       //            1,
 749       //            withoutUnicodePtr);
 750       //        }
 751       //        //fill the buffer with the char
 752       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 753       //        return true;
 754       //      } //-------------end unicode traitement--------------
 755       //      else {
 756       if (!isPHPIdentifierPart(currentCharacter)) {
 757         currentPosition = temp;
 758         return false;
 759       }
 760       //        if (withoutUnicodePtr != 0)
 761       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 762       return true;
 763       //      }
 764     } catch (IndexOutOfBoundsException e) {
 765       currentPosition = temp;
 766       return false;
 767     }
 768   }
 769
 770   public int getCastOrParen() {
 771     int tempPosition = currentPosition;
 772     char tempCharacter = currentCharacter;
 773     int tempToken = TokenNameLPAREN;
 774     boolean found = false;
 775     StringBuffer buf = new StringBuffer();
 776     try {
 777       do {
 778         currentCharacter = source[currentPosition++];
 779       } while (currentCharacter == ' ' || currentCharacter == '\t');
 780       while ((currentCharacter >= 'a' && currentCharacter <= 'z') || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
 781         buf.append(currentCharacter);
 782         currentCharacter = source[currentPosition++];
 783       }
 784       if (buf.length() >= 3 && buf.length() <= 7) {
 785         char[] data = buf.toString().toCharArray();
 786         int index = 0;
 787         switch (data.length) {
 788         case 3:
 789           // int
 790           if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
 791             found = true;
 792             tempToken = TokenNameintCAST;
 793           }
 794           break;
 795         case 4:
 796           // bool real
 797           if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
 798             found = true;
 799             tempToken = TokenNameboolCAST;
 800           } else {
 801             index = 0;
 802             if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
 803               found = true;
 804               tempToken = TokenNamedoubleCAST;
 805             }
 806           }
 807           break;
 808         case 5:
 809           // array unset float
 810           if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
 811               && (data[++index] == 'y')) {
 812             found = true;
 813             tempToken = TokenNamearrayCAST;
 814           } else {
 815             index = 0;
 816             if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
 817                 && (data[++index] == 't')) {
 818               found = true;
 819               tempToken = TokenNameunsetCAST;
 820             } else {
 821               index = 0;
 822               if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
 823                   && (data[++index] == 't')) {
 824                 found = true;
 825                 tempToken = TokenNamedoubleCAST;
 826               }
 827             }
 828           }
 829           break;
 830         case 6:
 831           // object string double
 832           if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
 833               && (data[++index] == 'c') && (data[++index] == 't')) {
 834             found = true;
 835             tempToken = TokenNameobjectCAST;
 836           } else {
 837             index = 0;
 838             if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
 839                 && (data[++index] == 'n') && (data[++index] == 'g')) {
 840               found = true;
 841               tempToken = TokenNamestringCAST;
 842             } else {
 843               index = 0;
 844               if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
 845                   && (data[++index] == 'l') && (data[++index] == 'e')) {
 846                 found = true;
 847                 tempToken = TokenNamedoubleCAST;
 848               }
 849             }
 850           }
 851           break;
 852         case 7:
 853           // boolean integer
 854           if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
 855               && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
 856             found = true;
 857             tempToken = TokenNameboolCAST;
 858           } else {
 859             index = 0;
 860             if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
 861                 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
 862               found = true;
 863               tempToken = TokenNameintCAST;
 864             }
 865           }
 866           break;
 867         }
 868         if (found) {
 869           while (currentCharacter == ' ' || currentCharacter == '\t') {
 870             currentCharacter = source[currentPosition++];
 871           }
 872           if (currentCharacter == ')') {
 873             return tempToken;
 874           }
 875         }
 876       }
 877     } catch (IndexOutOfBoundsException e) {
 878     }
 879     currentCharacter = tempCharacter;
 880     currentPosition = tempPosition;
 881     return TokenNameLPAREN;
 882   }
 883
 884   public void consumeStringInterpolated() throws InvalidInputException {
 885     try {
 886       // consume next character
 887       unicodeAsBackSlash = false;
 888       currentCharacter = source[currentPosition++];
 889       //                if (((currentCharacter = source[currentPosition++]) == '\\')
 890       //                  && (source[currentPosition] == 'u')) {
 891       //                  getNextUnicodeChar();
 892       //                } else {
 893       //                  if (withoutUnicodePtr != 0) {
 894       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
 895       //                      currentCharacter;
 896       //                  }
 897       //                }
 898       while (currentCharacter != '`') {
 899         /** ** in PHP \r and \n are valid in string literals *** */
 900         //                if ((currentCharacter == '\n')
 901         //                  || (currentCharacter == '\r')) {
 902         //                  // relocate if finding another quote fairly close: thus unicode
 903         // '/u000D' will be fully consumed
 904         //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 905         //                    if (currentPosition + lookAhead == source.length)
 906         //                      break;
 907         //                    if (source[currentPosition + lookAhead] == '\n')
 908         //                      break;
 909         //                    if (source[currentPosition + lookAhead] == '\"') {
 910         //                      currentPosition += lookAhead + 1;
 911         //                      break;
 912         //                    }
 913         //                  }
 914         //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
 915         //                }
 916         if (currentCharacter == '\\') {
 917           int escapeSize = currentPosition;
 918           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
 919           //scanEscapeCharacter make a side effect on this value and we need
 920           // the previous value few lines down this one
 921           scanDoubleQuotedEscapeCharacter();
 922           escapeSize = currentPosition - escapeSize;
 923           if (withoutUnicodePtr == 0) {
 924             //buffer all the entries that have been left aside....
 925             withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
 926             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
 927             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 928           } else { //overwrite the / in the buffer
 929             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
 930             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
 931               // where only one is correct
 932               withoutUnicodePtr--;
 933             }
 934           }
 935         }
 936         // consume next character
 937         unicodeAsBackSlash = false;
 938         currentCharacter = source[currentPosition++];
 939         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 940         //                    && (source[currentPosition] == 'u')) {
 941         //                    getNextUnicodeChar();
 942         //                  } else {
 943         if (withoutUnicodePtr != 0) {
 944           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 945         }
 946         //                  }
 947       }
 948     } catch (IndexOutOfBoundsException e) {
 949       //    reset end position for error reporting
 950       currentPosition -= 2;
 951       throw new InvalidInputException(UNTERMINATED_STRING);
 952     } catch (InvalidInputException e) {
 953       if (e.getMessage().equals(INVALID_ESCAPE)) {
 954         // relocate if finding another quote fairly close: thus unicode
 955         // '/u000D' will be fully consumed
 956         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 957           if (currentPosition + lookAhead == source.length)
 958             break;
 959           if (source[currentPosition + lookAhead] == '\n')
 960             break;
 961           if (source[currentPosition + lookAhead] == '`') {
 962             currentPosition += lookAhead + 1;
 963             break;
 964           }
 965         }
 966       }
 967       throw e; // rethrow
 968     }
 969     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
 970       // //$NON-NLS-?$ where ? is an
 971       // int.
 972       if (currentLine == null) {
 973         currentLine = new NLSLine();
 974         lines.add(currentLine);
 975       }
 976       currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
 977     }
 978   }
 979
 980   public void consumeStringConstant() throws InvalidInputException {
 981     try {
 982       // consume next character
 983       unicodeAsBackSlash = false;
 984       currentCharacter = source[currentPosition++];
 985       //                if (((currentCharacter = source[currentPosition++]) == '\\')
 986       //                  && (source[currentPosition] == 'u')) {
 987       //                  getNextUnicodeChar();
 988       //                } else {
 989       //                  if (withoutUnicodePtr != 0) {
 990       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
 991       //                      currentCharacter;
 992       //                  }
 993       //                }
 994       while (currentCharacter != '\'') {
 995         /** ** in PHP \r and \n are valid in string literals *** */
 996         //                  if ((currentCharacter == '\n')
 997         //                    || (currentCharacter == '\r')) {
 998         //                    // relocate if finding another quote fairly close: thus unicode
 999         // '/u000D' will be fully consumed
1000         //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1001         //                      if (currentPosition + lookAhead == source.length)
1002         //                        break;
1003         //                      if (source[currentPosition + lookAhead] == '\n')
1004         //                        break;
1005         //                      if (source[currentPosition + lookAhead] == '\"') {
1006         //                        currentPosition += lookAhead + 1;
1007         //                        break;
1008         //                      }
1009         //                    }
1010         //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1011         //                  }
1012         if (currentCharacter == '\\') {
1013           int escapeSize = currentPosition;
1014           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1015           //scanEscapeCharacter make a side effect on this value and we need
1016           // the previous value few lines down this one
1017           scanSingleQuotedEscapeCharacter();
1018           escapeSize = currentPosition - escapeSize;
1019           if (withoutUnicodePtr == 0) {
1020             //buffer all the entries that have been left aside....
1021             withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1022             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1023             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1024           } else { //overwrite the / in the buffer
1025             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1026             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1027               // where only one is correct
1028               withoutUnicodePtr--;
1029             }
1030           }
1031         }
1032         // consume next character
1033         unicodeAsBackSlash = false;
1034         currentCharacter = source[currentPosition++];
1035         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1036         //                    && (source[currentPosition] == 'u')) {
1037         //                    getNextUnicodeChar();
1038         //                  } else {
1039         if (withoutUnicodePtr != 0) {
1040           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1041         }
1042         //                  }
1043       }
1044     } catch (IndexOutOfBoundsException e) {
1045       // reset end position for error reporting
1046       currentPosition -= 2;
1047       throw new InvalidInputException(UNTERMINATED_STRING);
1048     } catch (InvalidInputException e) {
1049       if (e.getMessage().equals(INVALID_ESCAPE)) {
1050         // relocate if finding another quote fairly close: thus unicode
1051         // '/u000D' will be fully consumed
1052         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1053           if (currentPosition + lookAhead == source.length)
1054             break;
1055           if (source[currentPosition + lookAhead] == '\n')
1056             break;
1057           if (source[currentPosition + lookAhead] == '\'') {
1058             currentPosition += lookAhead + 1;
1059             break;
1060           }
1061         }
1062       }
1063       throw e; // rethrow
1064     }
1065     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1066       // //$NON-NLS-?$ where ? is an
1067       // int.
1068       if (currentLine == null) {
1069         currentLine = new NLSLine();
1070         lines.add(currentLine);
1071       }
1072       currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1073     }
1074   }
1075
1076   public void consumeStringLiteral() throws InvalidInputException {
1077     try {
1078       // consume next character
1079       unicodeAsBackSlash = false;
1080       currentCharacter = source[currentPosition++];
1081       //                if (((currentCharacter = source[currentPosition++]) == '\\')
1082       //                  && (source[currentPosition] == 'u')) {
1083       //                  getNextUnicodeChar();
1084       //                } else {
1085       //                  if (withoutUnicodePtr != 0) {
1086       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1087       //                      currentCharacter;
1088       //                  }
1089       //                }
1090       while (currentCharacter != '"') {
1091         /** ** in PHP \r and \n are valid in string literals *** */
1092         //                  if ((currentCharacter == '\n')
1093         //                    || (currentCharacter == '\r')) {
1094         //                    // relocate if finding another quote fairly close: thus unicode
1095         // '/u000D' will be fully consumed
1096         //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1097         //                      if (currentPosition + lookAhead == source.length)
1098         //                        break;
1099         //                      if (source[currentPosition + lookAhead] == '\n')
1100         //                        break;
1101         //                      if (source[currentPosition + lookAhead] == '\"') {
1102         //                        currentPosition += lookAhead + 1;
1103         //                        break;
1104         //                      }
1105         //                    }
1106         //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1107         //                  }
1108         if (currentCharacter == '\\') {
1109           int escapeSize = currentPosition;
1110           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1111           //scanEscapeCharacter make a side effect on this value and we need
1112           // the previous value few lines down this one
1113           scanDoubleQuotedEscapeCharacter();
1114           escapeSize = currentPosition - escapeSize;
1115           if (withoutUnicodePtr == 0) {
1116             //buffer all the entries that have been left aside....
1117             withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1118             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1119             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1120           } else { //overwrite the / in the buffer
1121             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1122             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1123               // where only one is correct
1124               withoutUnicodePtr--;
1125             }
1126           }
1127         }
1128         // consume next character
1129         unicodeAsBackSlash = false;
1130         currentCharacter = source[currentPosition++];
1131         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1132         //                    && (source[currentPosition] == 'u')) {
1133         //                    getNextUnicodeChar();
1134         //                  } else {
1135         if (withoutUnicodePtr != 0) {
1136           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1137         }
1138         //                  }
1139       }
1140     } catch (IndexOutOfBoundsException e) {
1141       //    reset end position for error reporting
1142       currentPosition -= 2;
1143       throw new InvalidInputException(UNTERMINATED_STRING);
1144     } catch (InvalidInputException e) {
1145       if (e.getMessage().equals(INVALID_ESCAPE)) {
1146         // relocate if finding another quote fairly close: thus unicode
1147         // '/u000D' will be fully consumed
1148         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1149           if (currentPosition + lookAhead == source.length)
1150             break;
1151           if (source[currentPosition + lookAhead] == '\n')
1152             break;
1153           if (source[currentPosition + lookAhead] == '\"') {
1154             currentPosition += lookAhead + 1;
1155             break;
1156           }
1157         }
1158       }
1159       throw e; // rethrow
1160     }
1161     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1162       // //$NON-NLS-?$ where ? is an
1163       // int.
1164       if (currentLine == null) {
1165         currentLine = new NLSLine();
1166         lines.add(currentLine);
1167       }
1168       currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1169     }
1170   }
1171
1172   public int getNextToken() throws InvalidInputException {
1173     if (!phpMode) {
1174       return getInlinedHTML(currentPosition);
1175     }
1176     if (phpMode) {
1177       this.wasAcr = false;
1178       if (diet) {
1179         jumpOverMethodBody();
1180         diet = false;
1181         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1182       }
1183       try {
1184         while (true) {
1185           withoutUnicodePtr = 0;
1186           //start with a new token
1187           char encapsedChar = ' ';
1188           if (!encapsedStringStack.isEmpty()) {
1189             encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
1190           }
1191           if (encapsedChar != '$' && encapsedChar != ' ') {
1192             currentCharacter = source[currentPosition++];
1193             if (currentCharacter == encapsedChar) {
1194               switch (currentCharacter) {
1195               case '`':
1196                 return TokenNameEncapsedString0;
1197               case '\'':
1198                 return TokenNameEncapsedString1;
1199               case '"':
1200                 return TokenNameEncapsedString2;
1201               }
1202             }
1203             while (currentCharacter != encapsedChar) {
1204               /** ** in PHP \r and \n are valid in string literals *** */
1205               switch (currentCharacter) {
1206               case '\\':
1207                 int escapeSize = currentPosition;
1208                 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1209                 //scanEscapeCharacter make a side effect on this value and
1210                 // we need the previous value few lines down this one
1211                 scanDoubleQuotedEscapeCharacter();
1212                 escapeSize = currentPosition - escapeSize;
1213                 if (withoutUnicodePtr == 0) {
1214                   //buffer all the entries that have been left aside....
1215                   withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1216                   System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1217                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1218                 } else { //overwrite the / in the buffer
1219                   withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1220                   if (backSlashAsUnicodeInString) { //there are TWO \ in
1221                     withoutUnicodePtr--;
1222                   }
1223                 }
1224                 break;
1225               case '$':
1226                 if (isPHPIdentifierStart(source[currentPosition]) || source[currentPosition] == '{') {
1227                   currentPosition--;
1228                   encapsedStringStack.push(new Character('$'));
1229                   return TokenNameSTRING;
1230                 }
1231                 break;
1232               case '{':
1233                 if (source[currentPosition] == '$') { // CURLY_OPEN
1234                   currentPosition--;
1235                   encapsedStringStack.push(new Character('$'));
1236                   return TokenNameSTRING;
1237                 }
1238               }
1239               // consume next character
1240               unicodeAsBackSlash = false;
1241               currentCharacter = source[currentPosition++];
1242               if (withoutUnicodePtr != 0) {
1243                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1244               }
1245               //                  }
1246             } // end while
1247             currentPosition--;
1248             return TokenNameSTRING;
1249           }
1250           // ---------Consume white space and handles startPosition---------
1251           int whiteStart = currentPosition;
1252           startPosition = currentPosition;
1253           currentCharacter = source[currentPosition++];
1254           if (encapsedChar == '$') {
1255             switch (currentCharacter) {
1256             case '\\':
1257               currentCharacter = source[currentPosition++];
1258               return TokenNameSTRING;
1259             case '{':
1260               if (encapsedChar == '$') {
1261                 if (getNextChar('$'))
1262                   return TokenNameLBRACE_DOLLAR;
1263               }
1264               return TokenNameLBRACE;
1265             case '}':
1266               return TokenNameRBRACE;
1267             case '[':
1268               return TokenNameLBRACKET;
1269             case ']':
1270               return TokenNameRBRACKET;
1271             case '\'':
1272               if (tokenizeStrings) {
1273                 consumeStringConstant();
1274                 return TokenNameStringSingleQuote;
1275               }
1276               return TokenNameEncapsedString1;
1277             case '"':
1278               return TokenNameEncapsedString2;
1279             case '`':
1280               if (tokenizeStrings) {
1281                 consumeStringInterpolated();
1282                 return TokenNameStringInterpolated;
1283               }
1284               return TokenNameEncapsedString0;
1285             case '-':
1286               if (getNextChar('>'))
1287                 return TokenNameMINUS_GREATER;
1288               return TokenNameSTRING;
1289             default:
1290               if (currentCharacter == '$') {
1291                 int oldPosition = currentPosition;
1292                 try {
1293                   currentCharacter = source[currentPosition++];
1294                   if (currentCharacter == '{') {
1295                     return TokenNameDOLLAR_LBRACE;
1296                   }
1297                   if (isPHPIdentifierStart(currentCharacter)) {
1298                     return scanIdentifierOrKeyword(true);
1299                   } else {
1300                     currentPosition = oldPosition;
1301                     return TokenNameSTRING;
1302                   }
1303                 } catch (IndexOutOfBoundsException e) {
1304                   currentPosition = oldPosition;
1305                   return TokenNameSTRING;
1306                 }
1307               }
1308               if (isPHPIdentifierStart(currentCharacter))
1309                 return scanIdentifierOrKeyword(false);
1310               if (Character.isDigit(currentCharacter))
1311                 return scanNumber(false);
1312               return TokenNameERROR;
1313             }
1314           }
1315           //          boolean isWhiteSpace;
1316
1317           while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1318             startPosition = currentPosition;
1319             currentCharacter = source[currentPosition++];
1320             //            if (((currentCharacter = source[currentPosition++]) == '\\')
1321             //              && (source[currentPosition] == 'u')) {
1322             //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
1323             //            } else {
1324             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1325               checkNonExternalizeString();
1326               if (recordLineSeparator) {
1327                 pushLineSeparator();
1328               } else {
1329                 currentLine = null;
1330               }
1331             }
1332             //            isWhiteSpace = (currentCharacter == ' ')
1333             //                || Character.isWhitespace(currentCharacter);
1334             //            }
1335           }
1336           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1337             // reposition scanner in case we are interested by spaces as tokens
1338             currentPosition--;
1339             startPosition = whiteStart;
1340             return TokenNameWHITESPACE;
1341           }
1342           //little trick to get out in the middle of a source compuation
1343           if (currentPosition > eofPosition)
1344             return TokenNameEOF;
1345           // ---------Identify the next token-------------
1346           switch (currentCharacter) {
1347           case '(':
1348             return getCastOrParen();
1349           case ')':
1350             return TokenNameRPAREN;
1351           case '{':
1352             return TokenNameLBRACE;
1353           case '}':
1354             return TokenNameRBRACE;
1355           case '[':
1356             return TokenNameLBRACKET;
1357           case ']':
1358             return TokenNameRBRACKET;
1359           case ';':
1360             return TokenNameSEMICOLON;
1361           case ',':
1362             return TokenNameCOMMA;
1363           case '.':
1364             if (getNextChar('='))
1365               return TokenNameDOT_EQUAL;
1366             if (getNextCharAsDigit())
1367               return scanNumber(true);
1368             return TokenNameDOT;
1369           case '+': {
1370             int test;
1371             if ((test = getNextChar('+', '=')) == 0)
1372               return TokenNamePLUS_PLUS;
1373             if (test > 0)
1374               return TokenNamePLUS_EQUAL;
1375             return TokenNamePLUS;
1376           }
1377           case '-': {
1378             int test;
1379             if ((test = getNextChar('-', '=')) == 0)
1380               return TokenNameMINUS_MINUS;
1381             if (test > 0)
1382               return TokenNameMINUS_EQUAL;
1383             if (getNextChar('>'))
1384               return TokenNameMINUS_GREATER;
1385             return TokenNameMINUS;
1386           }
1387           case '~':
1388             if (getNextChar('='))
1389               return TokenNameTWIDDLE_EQUAL;
1390             return TokenNameTWIDDLE;
1391           case '!':
1392             if (getNextChar('=')) {
1393               if (getNextChar('=')) {
1394                 return TokenNameNOT_EQUAL_EQUAL;
1395               }
1396               return TokenNameNOT_EQUAL;
1397             }
1398             return TokenNameNOT;
1399           case '*':
1400             if (getNextChar('='))
1401               return TokenNameMULTIPLY_EQUAL;
1402             return TokenNameMULTIPLY;
1403           case '%':
1404             if (getNextChar('='))
1405               return TokenNameREMAINDER_EQUAL;
1406             return TokenNameREMAINDER;
1407           case '<': {
1408             int oldPosition = currentPosition;
1409             try {
1410               currentCharacter = source[currentPosition++];
1411             } catch (IndexOutOfBoundsException e) {
1412               currentPosition = oldPosition;
1413               return TokenNameLESS;
1414             }
1415             switch (currentCharacter) {
1416             case '=':
1417               return TokenNameLESS_EQUAL;
1418             case '>':
1419               return TokenNameNOT_EQUAL;
1420             case '<':
1421               if (getNextChar('='))
1422                 return TokenNameLEFT_SHIFT_EQUAL;
1423               if (getNextChar('<')) {
1424                 currentCharacter = source[currentPosition++];
1425                 while (Character.isWhitespace(currentCharacter)) {
1426                   currentCharacter = source[currentPosition++];
1427                 }
1428                 int heredocStart = currentPosition - 1;
1429                 int heredocLength = 0;
1430                 if (isPHPIdentifierStart(currentCharacter)) {
1431                   currentCharacter = source[currentPosition++];
1432                 } else {
1433                   return TokenNameERROR;
1434                 }
1435                 while (isPHPIdentifierPart(currentCharacter)) {
1436                   currentCharacter = source[currentPosition++];
1437                 }
1438                 heredocLength = currentPosition - heredocStart - 1;
1439                 // heredoc end-tag determination
1440                 boolean endTag = true;
1441                 char ch;
1442                 do {
1443                   ch = source[currentPosition++];
1444                   if (ch == '\r' || ch == '\n') {
1445                     if (recordLineSeparator) {
1446                       pushLineSeparator();
1447                     } else {
1448                       currentLine = null;
1449                     }
1450                     for (int i = 0; i < heredocLength; i++) {
1451                       if (source[currentPosition + i] != source[heredocStart + i]) {
1452                         endTag = false;
1453                         break;
1454                       }
1455                     }
1456                     if (endTag) {
1457                       currentPosition += heredocLength - 1;
1458                       currentCharacter = source[currentPosition++];
1459                       break; // do...while loop
1460                     } else {
1461                       endTag = true;
1462                     }
1463                   }
1464                 } while (true);
1465                 return TokenNameHEREDOC;
1466               }
1467               return TokenNameLEFT_SHIFT;
1468             }
1469             currentPosition = oldPosition;
1470             return TokenNameLESS;
1471           }
1472           case '>': {
1473             int test;
1474             if ((test = getNextChar('=', '>')) == 0)
1475               return TokenNameGREATER_EQUAL;
1476             if (test > 0) {
1477               if ((test = getNextChar('=', '>')) == 0)
1478                 return TokenNameRIGHT_SHIFT_EQUAL;
1479               return TokenNameRIGHT_SHIFT;
1480             }
1481             return TokenNameGREATER;
1482           }
1483           case '=':
1484             if (getNextChar('=')) {
1485               if (getNextChar('=')) {
1486                 return TokenNameEQUAL_EQUAL_EQUAL;
1487               }
1488               return TokenNameEQUAL_EQUAL;
1489             }
1490             if (getNextChar('>'))
1491               return TokenNameEQUAL_GREATER;
1492             return TokenNameEQUAL;
1493           case '&': {
1494             int test;
1495             if ((test = getNextChar('&', '=')) == 0)
1496               return TokenNameAND_AND;
1497             if (test > 0)
1498               return TokenNameAND_EQUAL;
1499             return TokenNameAND;
1500           }
1501           case '|': {
1502             int test;
1503             if ((test = getNextChar('|', '=')) == 0)
1504               return TokenNameOR_OR;
1505             if (test > 0)
1506               return TokenNameOR_EQUAL;
1507             return TokenNameOR;
1508           }
1509           case '^':
1510             if (getNextChar('='))
1511               return TokenNameXOR_EQUAL;
1512             return TokenNameXOR;
1513           case '?':
1514             if (getNextChar('>')) {
1515               phpMode = false;
1516               if (currentPosition == source.length) {
1517                 phpMode = true;
1518                 return TokenNameINLINE_HTML;
1519               }
1520               return getInlinedHTML(currentPosition - 2);
1521             }
1522             return TokenNameQUESTION;
1523           case ':':
1524             if (getNextChar(':'))
1525               return TokenNamePAAMAYIM_NEKUDOTAYIM;
1526             return TokenNameCOLON;
1527           case '@':
1528             return TokenNameAT;
1529           case '\'':
1530             consumeStringConstant();
1531             return TokenNameStringSingleQuote;
1532           case '"':
1533             if (tokenizeStrings) {
1534               consumeStringLiteral();
1535               return TokenNameStringDoubleQuote;
1536             }
1537             return TokenNameEncapsedString2;
1538           case '`':
1539             if (tokenizeStrings) {
1540               consumeStringInterpolated();
1541               return TokenNameStringInterpolated;
1542             }
1543             return TokenNameEncapsedString0;
1544           case '#':
1545           case '/': {
1546             char startChar = currentCharacter;
1547             if (getNextChar('=') && startChar == '/') {
1548               return TokenNameDIVIDE_EQUAL;
1549             }
1550             int test;
1551             if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1552               //line comment
1553               this.lastCommentLinePosition = this.currentPosition;
1554               int endPositionForLineComment = 0;
1555               try { //get the next char
1556                 currentCharacter = source[currentPosition++];
1557                 //                    if (((currentCharacter = source[currentPosition++])
1558                 //                      == '\\')
1559                 //                      && (source[currentPosition] == 'u')) {
1560                 //                      //-------------unicode traitement ------------
1561                 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1562                 //                      currentPosition++;
1563                 //                      while (source[currentPosition] == 'u') {
1564                 //                        currentPosition++;
1565                 //                      }
1566                 //                      if ((c1 =
1567                 //                        Character.getNumericValue(source[currentPosition++]))
1568                 //                        > 15
1569                 //                        || c1 < 0
1570                 //                        || (c2 =
1571                 //                          Character.getNumericValue(source[currentPosition++]))
1572                 //                          > 15
1573                 //                        || c2 < 0
1574                 //                        || (c3 =
1575                 //                          Character.getNumericValue(source[currentPosition++]))
1576                 //                          > 15
1577                 //                        || c3 < 0
1578                 //                        || (c4 =
1579                 //                          Character.getNumericValue(source[currentPosition++]))
1580                 //                          > 15
1581                 //                        || c4 < 0) {
1582                 //                        throw new
1583                 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1584                 //                      } else {
1585                 //                        currentCharacter =
1586                 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1587                 //                      }
1588                 //                    }
1589                 //handle the \\u case manually into comment
1590                 //                    if (currentCharacter == '\\') {
1591                 //                      if (source[currentPosition] == '\\')
1592                 //                        currentPosition++;
1593                 //                    } //jump over the \\
1594                 boolean isUnicode = false;
1595                 while (currentCharacter != '\r' && currentCharacter != '\n') {
1596                   this.lastCommentLinePosition = this.currentPosition;
1597                   if (currentCharacter == '?') {
1598                     if (getNextChar('>')) {
1599                       startPosition = currentPosition - 2;
1600                       phpMode = false;
1601                       return TokenNameINLINE_HTML;
1602                     }
1603                   }
1604                   //get the next char
1605                   isUnicode = false;
1606                   currentCharacter = source[currentPosition++];
1607                   //                      if (((currentCharacter = source[currentPosition++])
1608                   //                        == '\\')
1609                   //                        && (source[currentPosition] == 'u')) {
1610                   //                        isUnicode = true;
1611                   //                        //-------------unicode traitement ------------
1612                   //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1613                   //                        currentPosition++;
1614                   //                        while (source[currentPosition] == 'u') {
1615                   //                          currentPosition++;
1616                   //                        }
1617                   //                        if ((c1 =
1618                   //                          Character.getNumericValue(source[currentPosition++]))
1619                   //                          > 15
1620                   //                          || c1 < 0
1621                   //                          || (c2 =
1622                   //                            Character.getNumericValue(
1623                   //                              source[currentPosition++]))
1624                   //                            > 15
1625                   //                          || c2 < 0
1626                   //                          || (c3 =
1627                   //                            Character.getNumericValue(
1628                   //                              source[currentPosition++]))
1629                   //                            > 15
1630                   //                          || c3 < 0
1631                   //                          || (c4 =
1632                   //                            Character.getNumericValue(
1633                   //                              source[currentPosition++]))
1634                   //                            > 15
1635                   //                          || c4 < 0) {
1636                   //                          throw new
1637                   // InvalidInputException(INVALID_UNICODE_ESCAPE);
1638                   //                        } else {
1639                   //                          currentCharacter =
1640                   //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1641                   //                        }
1642                   //                      }
1643                   //handle the \\u case manually into comment
1644                   //                      if (currentCharacter == '\\') {
1645                   //                        if (source[currentPosition] == '\\')
1646                   //                          currentPosition++;
1647                   //                      } //jump over the \\
1648                 }
1649                 if (isUnicode) {
1650                   endPositionForLineComment = currentPosition - 6;
1651                 } else {
1652                   endPositionForLineComment = currentPosition - 1;
1653                 }
1654                 //                    recordComment(false);
1655                 recordComment(TokenNameCOMMENT_LINE);
1656                 if (this.taskTags != null)
1657                   checkTaskTag(this.startPosition, this.currentPosition);
1658                 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1659                   checkNonExternalizeString();
1660                   if (recordLineSeparator) {
1661                     if (isUnicode) {
1662                       pushUnicodeLineSeparator();
1663                     } else {
1664                       pushLineSeparator();
1665                     }
1666                   } else {
1667                     currentLine = null;
1668                   }
1669                 }
1670                 if (tokenizeComments) {
1671                   if (!isUnicode) {
1672                     currentPosition = endPositionForLineComment;
1673                     // reset one character behind
1674                   }
1675                   return TokenNameCOMMENT_LINE;
1676                 }
1677               } catch (IndexOutOfBoundsException e) { //an eof will them
1678                 // be generated
1679                 if (tokenizeComments) {
1680                   currentPosition--;
1681                   // reset one character behind
1682                   return TokenNameCOMMENT_LINE;
1683                 }
1684               }
1685               break;
1686             }
1687             if (test > 0) {
1688               //traditional and annotation comment
1689               boolean isJavadoc = false, star = false;
1690               // consume next character
1691               unicodeAsBackSlash = false;
1692               currentCharacter = source[currentPosition++];
1693               //                  if (((currentCharacter = source[currentPosition++]) ==
1694               // '\\')
1695               //                    && (source[currentPosition] == 'u')) {
1696               //                    getNextUnicodeChar();
1697               //                  } else {
1698               //                    if (withoutUnicodePtr != 0) {
1699               //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1700               //                        currentCharacter;
1701               //                    }
1702               //                  }
1703               if (currentCharacter == '*') {
1704                 isJavadoc = true;
1705                 star = true;
1706               }
1707               if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1708                 checkNonExternalizeString();
1709                 if (recordLineSeparator) {
1710                   pushLineSeparator();
1711                 } else {
1712                   currentLine = null;
1713                 }
1714               }
1715               try { //get the next char
1716                 currentCharacter = source[currentPosition++];
1717                 //                    if (((currentCharacter = source[currentPosition++])
1718                 //                      == '\\')
1719                 //                      && (source[currentPosition] == 'u')) {
1720                 //                      //-------------unicode traitement ------------
1721                 //                      getNextUnicodeChar();
1722                 //                    }
1723                 //handle the \\u case manually into comment
1724                 //                    if (currentCharacter == '\\') {
1725                 //                      if (source[currentPosition] == '\\')
1726                 //                        currentPosition++;
1727                 //                      //jump over the \\
1728                 //                    }
1729                 // empty comment is not a javadoc /**/
1730                 if (currentCharacter == '/') {
1731                   isJavadoc = false;
1732                 }
1733                 //loop until end of comment */
1734                 while ((currentCharacter != '/') || (!star)) {
1735                   if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1736                     checkNonExternalizeString();
1737                     if (recordLineSeparator) {
1738                       pushLineSeparator();
1739                     } else {
1740                       currentLine = null;
1741                     }
1742                   }
1743                   star = currentCharacter == '*';
1744                   //get next char
1745                   currentCharacter = source[currentPosition++];
1746                   //                      if (((currentCharacter = source[currentPosition++])
1747                   //                        == '\\')
1748                   //                        && (source[currentPosition] == 'u')) {
1749                   //                        //-------------unicode traitement ------------
1750                   //                        getNextUnicodeChar();
1751                   //                      }
1752                   //handle the \\u case manually into comment
1753                   //                      if (currentCharacter == '\\') {
1754                   //                        if (source[currentPosition] == '\\')
1755                   //                          currentPosition++;
1756                   //                      } //jump over the \\
1757                 }
1758                 //recordComment(isJavadoc);
1759                 if (isJavadoc) {
1760                   recordComment(TokenNameCOMMENT_PHPDOC);
1761                 } else {
1762                   recordComment(TokenNameCOMMENT_BLOCK);
1763                 }
1764
1765                 if (tokenizeComments) {
1766                   if (isJavadoc)
1767                     return TokenNameCOMMENT_PHPDOC;
1768                   return TokenNameCOMMENT_BLOCK;
1769                 }
1770
1771                 if (this.taskTags != null) {
1772                   checkTaskTag(this.startPosition, this.currentPosition);
1773                 }
1774               } catch (IndexOutOfBoundsException e) {
1775                 //                  reset end position for error reporting
1776                 currentPosition -= 2;
1777                 throw new InvalidInputException(UNTERMINATED_COMMENT);
1778               }
1779               break;
1780             }
1781             return TokenNameDIVIDE;
1782           }
1783           case '\u001a':
1784             if (atEnd())
1785               return TokenNameEOF;
1786             //the atEnd may not be <currentPosition == source.length> if
1787             // source is only some part of a real (external) stream
1788             throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1789           default:
1790             if (currentCharacter == '$') {
1791               int oldPosition = currentPosition;
1792               try {
1793                 currentCharacter = source[currentPosition++];
1794                 if (isPHPIdentifierStart(currentCharacter)) {
1795                   return scanIdentifierOrKeyword(true);
1796                 } else {
1797                   currentPosition = oldPosition;
1798                   return TokenNameDOLLAR;
1799                 }
1800               } catch (IndexOutOfBoundsException e) {
1801                 currentPosition = oldPosition;
1802                 return TokenNameDOLLAR;
1803               }
1804             }
1805             if (isPHPIdentifierStart(currentCharacter))
1806               return scanIdentifierOrKeyword(false);
1807             if (Character.isDigit(currentCharacter))
1808               return scanNumber(false);
1809             return TokenNameERROR;
1810           }
1811         }
1812       } //-----------------end switch while try--------------------
1813       catch (IndexOutOfBoundsException e) {
1814       }
1815     }
1816     return TokenNameEOF;
1817   }
1818
1819   private int getInlinedHTML(int start) throws InvalidInputException {
1820     int token = getInlinedHTMLToken(start);
1821     if (token == TokenNameINLINE_HTML) {
1822       //                Stack stack = new Stack();
1823       //                // scan html for errors
1824       //                Source inlinedHTMLSource = new Source(new String(source, startPosition, currentPosition - startPosition));
1825       //                int lastPHPEndPos=0;
1826       //                for (Iterator i=inlinedHTMLSource.getNextTagIterator(0); i.hasNext();) {
1827       //                    Tag tag=(Tag)i.next();
1828       //
1829       //                    if (tag instanceof StartTag) {
1830       //                        StartTag startTag=(StartTag)tag;
1831       //                      // System.out.println("startTag: "+tag);
1832       //                        if (startTag.isServerTag()) {
1833       //                          // TODO : what to do with a server tag ?
1834       //                        } else {
1835       //                            // do whatever with HTML start tag
1836       //                            // use startTag.getElement() to find the element corresponding
1837       //                            // to this start tag which may be useful if you implement code
1838       //                            // folding etc
1839       //                                stack.push(startTag);
1840       //                        }
1841       //                    } else {
1842       //                        EndTag endTag=(EndTag)tag;
1843       //                        StartTag stag = (StartTag) stack.peek();
1844       //// System.out.println("endTag: "+tag);
1845       //                        // do whatever with HTML end tag.
1846       //                    }
1847       //                }
1848     }
1849     return token;
1850   }
1851
1852   /**
1853    * @return
1854    * @throws InvalidInputException
1855    */
1856   private int getInlinedHTMLToken(int start) throws InvalidInputException {
1857     //    int htmlPosition = start;
1858     if (currentPosition > source.length) {
1859       currentPosition = source.length;
1860       return TokenNameEOF;
1861     }
1862     startPosition = start;
1863     try {
1864       while (!phpMode) {
1865         currentCharacter = source[currentPosition++];
1866         if (currentCharacter == '<') {
1867           if (getNextChar('?')) {
1868             currentCharacter = source[currentPosition++];
1869             if ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1870               // <?
1871               if (ignorePHPOneLiner) {
1872                 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1873                   phpMode = true;
1874                   return TokenNameINLINE_HTML;
1875                 }
1876               } else {
1877                 phpMode = true;
1878                 return TokenNameINLINE_HTML;
1879               }
1880             } else {
1881               boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
1882               if (phpStart) {
1883                 int test = getNextChar('H', 'h');
1884                 if (test >= 0) {
1885                   test = getNextChar('P', 'p');
1886                   if (test >= 0) {
1887                     // <?PHP <?php
1888                     if (ignorePHPOneLiner) {
1889                       if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1890                         phpMode = true;
1891                         return TokenNameINLINE_HTML;
1892                       }
1893                     } else {
1894                       phpMode = true;
1895                       return TokenNameINLINE_HTML;
1896                     }
1897                   }
1898                 }
1899               }
1900             }
1901           }
1902         }
1903         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1904           if (recordLineSeparator) {
1905             pushLineSeparator();
1906           } else {
1907             currentLine = null;
1908           }
1909         }
1910       } //-----------------while--------------------
1911       phpMode = true;
1912       return TokenNameINLINE_HTML;
1913     } //-----------------try--------------------
1914     catch (IndexOutOfBoundsException e) {
1915       startPosition = start;
1916       currentPosition--;
1917     }
1918     phpMode = true;
1919     return TokenNameINLINE_HTML;
1920   }
1921
1922   /**
1923    * @return
1924    */
1925   private int lookAheadLinePHPTag() {
1926     // check if the PHP is only in this line (for CodeFormatter)
1927     int currentPositionInLine = currentPosition;
1928     char previousCharInLine = ' ';
1929     char currentCharInLine = ' ';
1930     boolean singleQuotedStringActive = false;
1931     boolean doubleQuotedStringActive = false;
1932
1933     try {
1934       // look ahead in this line
1935       while (true) {
1936         previousCharInLine = currentCharInLine;
1937         currentCharInLine = source[currentPositionInLine++];
1938         switch (currentCharInLine) {
1939         case '>':
1940           if (previousCharInLine == '?') {
1941             // update the scanner's current Position in the source
1942             currentPosition = currentPositionInLine;
1943             // use as "dummy" token
1944             return TokenNameEOF;
1945           }
1946           break;
1947         case '\"':
1948           if (doubleQuotedStringActive) {
1949             if (previousCharInLine != '\\') {
1950               doubleQuotedStringActive = false;
1951             }
1952           } else {
1953             if (!singleQuotedStringActive) {
1954               doubleQuotedStringActive = true;
1955             }
1956           }
1957           break;
1958         case '\'':
1959           if (singleQuotedStringActive) {
1960             if (previousCharInLine != '\\') {
1961               singleQuotedStringActive = false;
1962             }
1963           } else {
1964             if (!doubleQuotedStringActive) {
1965               singleQuotedStringActive = true;
1966             }
1967           }
1968           break;
1969         case '\n':
1970           phpMode = true;
1971           return TokenNameINLINE_HTML;
1972         case '#':
1973           if (!singleQuotedStringActive && !doubleQuotedStringActive) {
1974             phpMode = true;
1975             return TokenNameINLINE_HTML;
1976           }
1977           break;
1978         case '/':
1979           if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1980             phpMode = true;
1981             return TokenNameINLINE_HTML;
1982           }
1983           break;
1984         case '*':
1985           if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1986             phpMode = true;
1987             return TokenNameINLINE_HTML;
1988           }
1989           break;
1990         }
1991       }
1992     } catch (IndexOutOfBoundsException e) {
1993       phpMode = true;
1994       currentPosition = currentPositionInLine;
1995       return TokenNameINLINE_HTML;
1996     }
1997   }
1998
1999   //  public final void getNextUnicodeChar()
2000   //    throws IndexOutOfBoundsException, InvalidInputException {
2001   //    //VOID
2002   //    //handle the case of unicode.
2003   //    //when a unicode appears then we must use a buffer that holds char
2004   // internal values
2005   //    //At the end of this method currentCharacter holds the new visited char
2006   //    //and currentPosition points right next after it
2007   //
2008   //    //ALL getNextChar.... ARE OPTIMIZED COPIES
2009   //
2010   //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
2011   //    currentPosition++;
2012   //    while (source[currentPosition] == 'u') {
2013   //      currentPosition++;
2014   //      unicodeSize++;
2015   //    }
2016   //
2017   //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2018   //      || c1 < 0
2019   //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2020   //      || c2 < 0
2021   //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2022   //      || c3 < 0
2023   //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2024   //      || c4 < 0) {
2025   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2026   //    } else {
2027   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2028   //      //need the unicode buffer
2029   //      if (withoutUnicodePtr == 0) {
2030   //        //buffer all the entries that have been left aside....
2031   //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2032   //        System.arraycopy(
2033   //          source,
2034   //          startPosition,
2035   //          withoutUnicodeBuffer,
2036   //          1,
2037   //          withoutUnicodePtr);
2038   //      }
2039   //      //fill the buffer with the char
2040   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2041   //    }
2042   //    unicodeAsBackSlash = currentCharacter == '\\';
2043   //  }
2044   /*
2045    * Tokenize a method body, assuming that curly brackets are properly balanced.
2046    */
2047   public final void jumpOverMethodBody() {
2048     this.wasAcr = false;
2049     int found = 1;
2050     try {
2051       while (true) { //loop for jumping over comments
2052         // ---------Consume white space and handles startPosition---------
2053         boolean isWhiteSpace;
2054         do {
2055           startPosition = currentPosition;
2056           currentCharacter = source[currentPosition++];
2057           //          if (((currentCharacter = source[currentPosition++]) == '\\')
2058           //            && (source[currentPosition] == 'u')) {
2059           //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
2060           //          } else {
2061           if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2062             pushLineSeparator();
2063           isWhiteSpace = Character.isWhitespace(currentCharacter);
2064           //          }
2065         } while (isWhiteSpace);
2066         // -------consume token until } is found---------
2067         switch (currentCharacter) {
2068         case '{':
2069           found++;
2070           break;
2071         case '}':
2072           found--;
2073           if (found == 0)
2074             return;
2075           break;
2076         case '\'': {
2077           boolean test;
2078           test = getNextChar('\\');
2079           if (test) {
2080             try {
2081               scanDoubleQuotedEscapeCharacter();
2082             } catch (InvalidInputException ex) {
2083             }
2084             ;
2085           } else {
2086             //                try { // consume next character
2087             unicodeAsBackSlash = false;
2088             currentCharacter = source[currentPosition++];
2089             //                  if (((currentCharacter = source[currentPosition++]) == '\\')
2090             //                    && (source[currentPosition] == 'u')) {
2091             //                    getNextUnicodeChar();
2092             //                  } else {
2093             if (withoutUnicodePtr != 0) {
2094               withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2095             }
2096             //                  }
2097             //                } catch (InvalidInputException ex) {
2098             //                };
2099           }
2100           getNextChar('\'');
2101           break;
2102         }
2103         case '"':
2104           try {
2105             //              try { // consume next character
2106             unicodeAsBackSlash = false;
2107             currentCharacter = source[currentPosition++];
2108             //                if (((currentCharacter = source[currentPosition++]) == '\\')
2109             //                  && (source[currentPosition] == 'u')) {
2110             //                  getNextUnicodeChar();
2111             //                } else {
2112             if (withoutUnicodePtr != 0) {
2113               withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2114             }
2115             //                }
2116             //              } catch (InvalidInputException ex) {
2117             //              };
2118             while (currentCharacter != '"') {
2119               if (currentCharacter == '\r') {
2120                 if (source[currentPosition] == '\n')
2121                   currentPosition++;
2122                 break;
2123                 // the string cannot go further that the line
2124               }
2125               if (currentCharacter == '\n') {
2126                 break;
2127                 // the string cannot go further that the line
2128               }
2129               if (currentCharacter == '\\') {
2130                 try {
2131                   scanDoubleQuotedEscapeCharacter();
2132                 } catch (InvalidInputException ex) {
2133                 }
2134                 ;
2135               }
2136               //                try { // consume next character
2137               unicodeAsBackSlash = false;
2138               currentCharacter = source[currentPosition++];
2139               //                  if (((currentCharacter = source[currentPosition++]) == '\\')
2140               //                    && (source[currentPosition] == 'u')) {
2141               //                    getNextUnicodeChar();
2142               //                  } else {
2143               if (withoutUnicodePtr != 0) {
2144                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2145               }
2146               //                  }
2147               //                } catch (InvalidInputException ex) {
2148               //                };
2149             }
2150           } catch (IndexOutOfBoundsException e) {
2151             return;
2152           }
2153           break;
2154         case '/': {
2155           int test;
2156           if ((test = getNextChar('/', '*')) == 0) {
2157             //line comment
2158             try {
2159               //get the next char
2160               currentCharacter = source[currentPosition++];
2161               //                  if (((currentCharacter = source[currentPosition++]) ==
2162               // '\\')
2163               //                    && (source[currentPosition] == 'u')) {
2164               //                    //-------------unicode traitement ------------
2165               //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2166               //                    currentPosition++;
2167               //                    while (source[currentPosition] == 'u') {
2168               //                      currentPosition++;
2169               //                    }
2170               //                    if ((c1 =
2171               //                      Character.getNumericValue(source[currentPosition++]))
2172               //                      > 15
2173               //                      || c1 < 0
2174               //                      || (c2 =
2175               //                        Character.getNumericValue(source[currentPosition++]))
2176               //                        > 15
2177               //                      || c2 < 0
2178               //                      || (c3 =
2179               //                        Character.getNumericValue(source[currentPosition++]))
2180               //                        > 15
2181               //                      || c3 < 0
2182               //                      || (c4 =
2183               //                        Character.getNumericValue(source[currentPosition++]))
2184               //                        > 15
2185               //                      || c4 < 0) {
2186               //                      //error don't care of the value
2187               //                      currentCharacter = 'A';
2188               //                    } //something different from \n and \r
2189               //                    else {
2190               //                      currentCharacter =
2191               //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2192               //                    }
2193               //                  }
2194               while (currentCharacter != '\r' && currentCharacter != '\n') {
2195                 //get the next char
2196                 currentCharacter = source[currentPosition++];
2197                 //                    if (((currentCharacter = source[currentPosition++])
2198                 //                      == '\\')
2199                 //                      && (source[currentPosition] == 'u')) {
2200                 //                      //-------------unicode traitement ------------
2201                 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2202                 //                      currentPosition++;
2203                 //                      while (source[currentPosition] == 'u') {
2204                 //                        currentPosition++;
2205                 //                      }
2206                 //                      if ((c1 =
2207                 //                        Character.getNumericValue(source[currentPosition++]))
2208                 //                        > 15
2209                 //                        || c1 < 0
2210                 //                        || (c2 =
2211                 //                          Character.getNumericValue(source[currentPosition++]))
2212                 //                          > 15
2213                 //                        || c2 < 0
2214                 //                        || (c3 =
2215                 //                          Character.getNumericValue(source[currentPosition++]))
2216                 //                          > 15
2217                 //                        || c3 < 0
2218                 //                        || (c4 =
2219                 //                          Character.getNumericValue(source[currentPosition++]))
2220                 //                          > 15
2221                 //                        || c4 < 0) {
2222                 //                        //error don't care of the value
2223                 //                        currentCharacter = 'A';
2224                 //                      } //something different from \n and \r
2225                 //                      else {
2226                 //                        currentCharacter =
2227                 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2228                 //                      }
2229                 //                    }
2230               }
2231               if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2232                 pushLineSeparator();
2233             } catch (IndexOutOfBoundsException e) {
2234             } //an eof will them be generated
2235             break;
2236           }
2237           if (test > 0) {
2238             //traditional and annotation comment
2239             boolean star = false;
2240             //                try { // consume next character
2241             unicodeAsBackSlash = false;
2242             currentCharacter = source[currentPosition++];
2243             //                  if (((currentCharacter = source[currentPosition++]) == '\\')
2244             //                    && (source[currentPosition] == 'u')) {
2245             //                    getNextUnicodeChar();
2246             //                  } else {
2247             if (withoutUnicodePtr != 0) {
2248               withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2249             }
2250             //                  };
2251             //                } catch (InvalidInputException ex) {
2252             //                };
2253             if (currentCharacter == '*') {
2254               star = true;
2255             }
2256             if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2257               pushLineSeparator();
2258             try { //get the next char
2259               currentCharacter = source[currentPosition++];
2260               //                  if (((currentCharacter = source[currentPosition++]) ==
2261               // '\\')
2262               //                    && (source[currentPosition] == 'u')) {
2263               //                    //-------------unicode traitement ------------
2264               //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2265               //                    currentPosition++;
2266               //                    while (source[currentPosition] == 'u') {
2267               //                      currentPosition++;
2268               //                    }
2269               //                    if ((c1 =
2270               //                      Character.getNumericValue(source[currentPosition++]))
2271               //                      > 15
2272               //                      || c1 < 0
2273               //                      || (c2 =
2274               //                        Character.getNumericValue(source[currentPosition++]))
2275               //                        > 15
2276               //                      || c2 < 0
2277               //                      || (c3 =
2278               //                        Character.getNumericValue(source[currentPosition++]))
2279               //                        > 15
2280               //                      || c3 < 0
2281               //                      || (c4 =
2282               //                        Character.getNumericValue(source[currentPosition++]))
2283               //                        > 15
2284               //                      || c4 < 0) {
2285               //                      //error don't care of the value
2286               //                      currentCharacter = 'A';
2287               //                    } //something different from * and /
2288               //                    else {
2289               //                      currentCharacter =
2290               //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2291               //                    }
2292               //                  }
2293               //loop until end of comment */
2294               while ((currentCharacter != '/') || (!star)) {
2295                 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2296                   pushLineSeparator();
2297                 star = currentCharacter == '*';
2298                 //get next char
2299                 currentCharacter = source[currentPosition++];
2300                 //                    if (((currentCharacter = source[currentPosition++])
2301                 //                      == '\\')
2302                 //                      && (source[currentPosition] == 'u')) {
2303                 //                      //-------------unicode traitement ------------
2304                 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2305                 //                      currentPosition++;
2306                 //                      while (source[currentPosition] == 'u') {
2307                 //                        currentPosition++;
2308                 //                      }
2309                 //                      if ((c1 =
2310                 //                        Character.getNumericValue(source[currentPosition++]))
2311                 //                        > 15
2312                 //                        || c1 < 0
2313                 //                        || (c2 =
2314                 //                          Character.getNumericValue(source[currentPosition++]))
2315                 //                          > 15
2316                 //                        || c2 < 0
2317                 //                        || (c3 =
2318                 //                          Character.getNumericValue(source[currentPosition++]))
2319                 //                          > 15
2320                 //                        || c3 < 0
2321                 //                        || (c4 =
2322                 //                          Character.getNumericValue(source[currentPosition++]))
2323                 //                          > 15
2324                 //                        || c4 < 0) {
2325                 //                        //error don't care of the value
2326                 //                        currentCharacter = 'A';
2327                 //                      } //something different from * and /
2328                 //                      else {
2329                 //                        currentCharacter =
2330                 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2331                 //                      }
2332                 //                    }
2333               }
2334             } catch (IndexOutOfBoundsException e) {
2335               return;
2336             }
2337             break;
2338           }
2339           break;
2340         }
2341         default:
2342           if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
2343             try {
2344               scanIdentifierOrKeyword((currentCharacter == '$'));
2345             } catch (InvalidInputException ex) {
2346             }
2347             ;
2348             break;
2349           }
2350           if (Character.isDigit(currentCharacter)) {
2351             try {
2352               scanNumber(false);
2353             } catch (InvalidInputException ex) {
2354             }
2355             ;
2356             break;
2357           }
2358         }
2359       }
2360       //-----------------end switch while try--------------------
2361     } catch (IndexOutOfBoundsException e) {
2362     } catch (InvalidInputException e) {
2363     }
2364     return;
2365   }
2366
2367   //  public final boolean jumpOverUnicodeWhiteSpace()
2368   //    throws InvalidInputException {
2369   //    //BOOLEAN
2370   //    //handle the case of unicode. Jump over the next whiteSpace
2371   //    //making startPosition pointing on the next available char
2372   //    //On false, the currentCharacter is filled up with a potential
2373   //    //correct char
2374   //
2375   //    try {
2376   //      this.wasAcr = false;
2377   //      int c1, c2, c3, c4;
2378   //      int unicodeSize = 6;
2379   //      currentPosition++;
2380   //      while (source[currentPosition] == 'u') {
2381   //        currentPosition++;
2382   //        unicodeSize++;
2383   //      }
2384   //
2385   //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2386   //        || c1 < 0)
2387   //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2388   //          || c2 < 0)
2389   //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2390   //          || c3 < 0)
2391   //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2392   //          || c4 < 0)) {
2393   //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2394   //      }
2395   //
2396   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2397   //      if (recordLineSeparator
2398   //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2399   //        pushLineSeparator();
2400   //      if (Character.isWhitespace(currentCharacter))
2401   //        return true;
2402   //
2403   //      //buffer the new char which is not a white space
2404   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2405   //      //withoutUnicodePtr == 1 is true here
2406   //      return false;
2407   //    } catch (IndexOutOfBoundsException e) {
2408   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2409   //    }
2410   //  }
2411   public final int[] getLineEnds() {
2412     //return a bounded copy of this.lineEnds
2413     int[] copy;
2414     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2415     return copy;
2416   }
2417
2418   public char[] getSource() {
2419     return this.source;
2420   }
2421
2422   public static boolean isIdentifierOrKeyword(int token) {
2423     return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2424   }
2425
2426   final char[] optimizedCurrentTokenSource1() {
2427     //return always the same char[] build only once
2428     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2429     char charOne = source[startPosition];
2430     switch (charOne) {
2431     case 'a':
2432       return charArray_a;
2433     case 'b':
2434       return charArray_b;
2435     case 'c':
2436       return charArray_c;
2437     case 'd':
2438       return charArray_d;
2439     case 'e':
2440       return charArray_e;
2441     case 'f':
2442       return charArray_f;
2443     case 'g':
2444       return charArray_g;
2445     case 'h':
2446       return charArray_h;
2447     case 'i':
2448       return charArray_i;
2449     case 'j':
2450       return charArray_j;
2451     case 'k':
2452       return charArray_k;
2453     case 'l':
2454       return charArray_l;
2455     case 'm':
2456       return charArray_m;
2457     case 'n':
2458       return charArray_n;
2459     case 'o':
2460       return charArray_o;
2461     case 'p':
2462       return charArray_p;
2463     case 'q':
2464       return charArray_q;
2465     case 'r':
2466       return charArray_r;
2467     case 's':
2468       return charArray_s;
2469     case 't':
2470       return charArray_t;
2471     case 'u':
2472       return charArray_u;
2473     case 'v':
2474       return charArray_v;
2475     case 'w':
2476       return charArray_w;
2477     case 'x':
2478       return charArray_x;
2479     case 'y':
2480       return charArray_y;
2481     case 'z':
2482       return charArray_z;
2483     default:
2484       return new char[] { charOne };
2485     }
2486   }
2487
2488   final char[] optimizedCurrentTokenSource2() {
2489     char c0, c1;
2490     c0 = source[startPosition];
2491     c1 = source[startPosition + 1];
2492     if (c0 == '$') {
2493       //return always the same char[] build only once
2494       //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2495       switch (c1) {
2496       case 'a':
2497         return charArray_va;
2498       case 'b':
2499         return charArray_vb;
2500       case 'c':
2501         return charArray_vc;
2502       case 'd':
2503         return charArray_vd;
2504       case 'e':
2505         return charArray_ve;
2506       case 'f':
2507         return charArray_vf;
2508       case 'g':
2509         return charArray_vg;
2510       case 'h':
2511         return charArray_vh;
2512       case 'i':
2513         return charArray_vi;
2514       case 'j':
2515         return charArray_vj;
2516       case 'k':
2517         return charArray_vk;
2518       case 'l':
2519         return charArray_vl;
2520       case 'm':
2521         return charArray_vm;
2522       case 'n':
2523         return charArray_vn;
2524       case 'o':
2525         return charArray_vo;
2526       case 'p':
2527         return charArray_vp;
2528       case 'q':
2529         return charArray_vq;
2530       case 'r':
2531         return charArray_vr;
2532       case 's':
2533         return charArray_vs;
2534       case 't':
2535         return charArray_vt;
2536       case 'u':
2537         return charArray_vu;
2538       case 'v':
2539         return charArray_vv;
2540       case 'w':
2541         return charArray_vw;
2542       case 'x':
2543         return charArray_vx;
2544       case 'y':
2545         return charArray_vy;
2546       case 'z':
2547         return charArray_vz;
2548       }
2549     }
2550     //try to return the same char[] build only once
2551     int hash = ((c0 << 6) + c1) % TableSize;
2552     char[][] table = charArray_length[0][hash];
2553     int i = newEntry2;
2554     while (++i < InternalTableSize) {
2555       char[] charArray = table[i];
2556       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2557         return charArray;
2558     }
2559     //---------other side---------
2560     i = -1;
2561     int max = newEntry2;
2562     while (++i <= max) {
2563       char[] charArray = table[i];
2564       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2565         return charArray;
2566     }
2567     //--------add the entry-------
2568     if (++max >= InternalTableSize)
2569       max = 0;
2570     char[] r;
2571     table[max] = (r = new char[] { c0, c1 });
2572     newEntry2 = max;
2573     return r;
2574   }
2575
2576   final char[] optimizedCurrentTokenSource3() {
2577     //try to return the same char[] build only once
2578     char c0, c1, c2;
2579     int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2580         % TableSize;
2581     char[][] table = charArray_length[1][hash];
2582     int i = newEntry3;
2583     while (++i < InternalTableSize) {
2584       char[] charArray = table[i];
2585       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2586         return charArray;
2587     }
2588     //---------other side---------
2589     i = -1;
2590     int max = newEntry3;
2591     while (++i <= max) {
2592       char[] charArray = table[i];
2593       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2594         return charArray;
2595     }
2596     //--------add the entry-------
2597     if (++max >= InternalTableSize)
2598       max = 0;
2599     char[] r;
2600     table[max] = (r = new char[] { c0, c1, c2 });
2601     newEntry3 = max;
2602     return r;
2603   }
2604
2605   final char[] optimizedCurrentTokenSource4() {
2606     //try to return the same char[] build only once
2607     char c0, c1, c2, c3;
2608     long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2609         + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2610         % TableSize;
2611     char[][] table = charArray_length[2][(int) hash];
2612     int i = newEntry4;
2613     while (++i < InternalTableSize) {
2614       char[] charArray = table[i];
2615       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2616         return charArray;
2617     }
2618     //---------other side---------
2619     i = -1;
2620     int max = newEntry4;
2621     while (++i <= max) {
2622       char[] charArray = table[i];
2623       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2624         return charArray;
2625     }
2626     //--------add the entry-------
2627     if (++max >= InternalTableSize)
2628       max = 0;
2629     char[] r;
2630     table[max] = (r = new char[] { c0, c1, c2, c3 });
2631     newEntry4 = max;
2632     return r;
2633   }
2634
2635   final char[] optimizedCurrentTokenSource5() {
2636     //try to return the same char[] build only once
2637     char c0, c1, c2, c3, c4;
2638     long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2639         + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2640         % TableSize;
2641     char[][] table = charArray_length[3][(int) hash];
2642     int i = newEntry5;
2643     while (++i < InternalTableSize) {
2644       char[] charArray = table[i];
2645       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2646         return charArray;
2647     }
2648     //---------other side---------
2649     i = -1;
2650     int max = newEntry5;
2651     while (++i <= max) {
2652       char[] charArray = table[i];
2653       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2654         return charArray;
2655     }
2656     //--------add the entry-------
2657     if (++max >= InternalTableSize)
2658       max = 0;
2659     char[] r;
2660     table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2661     newEntry5 = max;
2662     return r;
2663   }
2664
2665   final char[] optimizedCurrentTokenSource6() {
2666     //try to return the same char[] build only once
2667     char c0, c1, c2, c3, c4, c5;
2668     long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2669         + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2670         + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2671         % TableSize;
2672     char[][] table = charArray_length[4][(int) hash];
2673     int i = newEntry6;
2674     while (++i < InternalTableSize) {
2675       char[] charArray = table[i];
2676       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2677           && (c5 == charArray[5]))
2678         return charArray;
2679     }
2680     //---------other side---------
2681     i = -1;
2682     int max = newEntry6;
2683     while (++i <= max) {
2684       char[] charArray = table[i];
2685       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2686           && (c5 == charArray[5]))
2687         return charArray;
2688     }
2689     //--------add the entry-------
2690     if (++max >= InternalTableSize)
2691       max = 0;
2692     char[] r;
2693     table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2694     newEntry6 = max;
2695     return r;
2696   }
2697
2698   public final void pushLineSeparator() throws InvalidInputException {
2699     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2700     final int INCREMENT = 250;
2701     if (this.checkNonExternalizedStringLiterals) {
2702       // reinitialize the current line for non externalize strings purpose
2703       currentLine = null;
2704     }
2705     //currentCharacter is at position currentPosition-1
2706     // cr 000D
2707     if (currentCharacter == '\r') {
2708       int separatorPos = currentPosition - 1;
2709       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2710         return;
2711       //System.out.println("CR-" + separatorPos);
2712       try {
2713         lineEnds[++linePtr] = separatorPos;
2714       } catch (IndexOutOfBoundsException e) {
2715         //linePtr value is correct
2716         int oldLength = lineEnds.length;
2717         int[] old = lineEnds;
2718         lineEnds = new int[oldLength + INCREMENT];
2719         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2720         lineEnds[linePtr] = separatorPos;
2721       }
2722       // look-ahead for merged cr+lf
2723       try {
2724         if (source[currentPosition] == '\n') {
2725           //System.out.println("look-ahead LF-" + currentPosition);
2726           lineEnds[linePtr] = currentPosition;
2727           currentPosition++;
2728           wasAcr = false;
2729         } else {
2730           wasAcr = true;
2731         }
2732       } catch (IndexOutOfBoundsException e) {
2733         wasAcr = true;
2734       }
2735     } else {
2736       // lf 000A
2737       if (currentCharacter == '\n') {
2738         //must merge eventual cr followed by lf
2739         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2740           //System.out.println("merge LF-" + (currentPosition - 1));
2741           lineEnds[linePtr] = currentPosition - 1;
2742         } else {
2743           int separatorPos = currentPosition - 1;
2744           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2745             return;
2746           // System.out.println("LF-" + separatorPos);
2747           try {
2748             lineEnds[++linePtr] = separatorPos;
2749           } catch (IndexOutOfBoundsException e) {
2750             //linePtr value is correct
2751             int oldLength = lineEnds.length;
2752             int[] old = lineEnds;
2753             lineEnds = new int[oldLength + INCREMENT];
2754             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2755             lineEnds[linePtr] = separatorPos;
2756           }
2757         }
2758         wasAcr = false;
2759       }
2760     }
2761   }
2762
2763   public final void pushUnicodeLineSeparator() {
2764     // isUnicode means that the \r or \n has been read as a unicode character
2765     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2766     final int INCREMENT = 250;
2767     //currentCharacter is at position currentPosition-1
2768     if (this.checkNonExternalizedStringLiterals) {
2769       // reinitialize the current line for non externalize strings purpose
2770       currentLine = null;
2771     }
2772     // cr 000D
2773     if (currentCharacter == '\r') {
2774       int separatorPos = currentPosition - 6;
2775       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2776         return;
2777       //System.out.println("CR-" + separatorPos);
2778       try {
2779         lineEnds[++linePtr] = separatorPos;
2780       } catch (IndexOutOfBoundsException e) {
2781         //linePtr value is correct
2782         int oldLength = lineEnds.length;
2783         int[] old = lineEnds;
2784         lineEnds = new int[oldLength + INCREMENT];
2785         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2786         lineEnds[linePtr] = separatorPos;
2787       }
2788       // look-ahead for merged cr+lf
2789       if (source[currentPosition] == '\n') {
2790         //System.out.println("look-ahead LF-" + currentPosition);
2791         lineEnds[linePtr] = currentPosition;
2792         currentPosition++;
2793         wasAcr = false;
2794       } else {
2795         wasAcr = true;
2796       }
2797     } else {
2798       // lf 000A
2799       if (currentCharacter == '\n') {
2800         //must merge eventual cr followed by lf
2801         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2802           //System.out.println("merge LF-" + (currentPosition - 1));
2803           lineEnds[linePtr] = currentPosition - 6;
2804         } else {
2805           int separatorPos = currentPosition - 6;
2806           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2807             return;
2808           // System.out.println("LF-" + separatorPos);
2809           try {
2810             lineEnds[++linePtr] = separatorPos;
2811           } catch (IndexOutOfBoundsException e) {
2812             //linePtr value is correct
2813             int oldLength = lineEnds.length;
2814             int[] old = lineEnds;
2815             lineEnds = new int[oldLength + INCREMENT];
2816             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2817             lineEnds[linePtr] = separatorPos;
2818           }
2819         }
2820         wasAcr = false;
2821       }
2822     }
2823   }
2824
2825   public void recordComment(int token) {
2826     // compute position
2827     int stopPosition = this.currentPosition;
2828     switch (token) {
2829     case TokenNameCOMMENT_LINE:
2830       stopPosition = -this.lastCommentLinePosition;
2831       break;
2832     case TokenNameCOMMENT_BLOCK:
2833       stopPosition = -this.currentPosition;
2834       break;
2835     }
2836
2837     // a new comment is recorded
2838     int length = this.commentStops.length;
2839     if (++this.commentPtr >= length) {
2840       System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2841       //grows the positions buffers too
2842       System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2843     }
2844     this.commentStops[this.commentPtr] = stopPosition;
2845     this.commentStarts[this.commentPtr] = this.startPosition;
2846   }
2847
2848   //  public final void recordComment(boolean isJavadoc) {
2849   //    // a new annotation comment is recorded
2850   //    try {
2851   //      commentStops[++commentPtr] = isJavadoc
2852   //          ? currentPosition
2853   //          : -currentPosition;
2854   //    } catch (IndexOutOfBoundsException e) {
2855   //      int oldStackLength = commentStops.length;
2856   //      int[] oldStack = commentStops;
2857   //      commentStops = new int[oldStackLength + 30];
2858   //      System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2859   //      commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2860   //      //grows the positions buffers too
2861   //      int[] old = commentStarts;
2862   //      commentStarts = new int[oldStackLength + 30];
2863   //      System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2864   //    }
2865   //    //the buffer is of a correct size here
2866   //    commentStarts[commentPtr] = startPosition;
2867   //  }
2868   public void resetTo(int begin, int end) {
2869     //reset the scanner to a given position where it may rescan again
2870     diet = false;
2871     initialPosition = startPosition = currentPosition = begin;
2872     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2873     commentPtr = -1; // reset comment stack
2874   }
2875
2876   public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2877     // the string with "\\u" is a legal string of two chars \ and u
2878     //thus we use a direct access to the source (for regular cases).
2879     //    if (unicodeAsBackSlash) {
2880     //      // consume next character
2881     //      unicodeAsBackSlash = false;
2882     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2883     //        && (source[currentPosition] == 'u')) {
2884     //        getNextUnicodeChar();
2885     //      } else {
2886     //        if (withoutUnicodePtr != 0) {
2887     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2888     //        }
2889     //      }
2890     //    } else
2891     currentCharacter = source[currentPosition++];
2892     switch (currentCharacter) {
2893     case '\'':
2894       currentCharacter = '\'';
2895       break;
2896     case '\\':
2897       currentCharacter = '\\';
2898       break;
2899     default:
2900       currentCharacter = '\\';
2901       currentPosition--;
2902     }
2903   }
2904
2905   public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2906     // the string with "\\u" is a legal string of two chars \ and u
2907     //thus we use a direct access to the source (for regular cases).
2908     //    if (unicodeAsBackSlash) {
2909     //      // consume next character
2910     //      unicodeAsBackSlash = false;
2911     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2912     //        && (source[currentPosition] == 'u')) {
2913     //        getNextUnicodeChar();
2914     //      } else {
2915     //        if (withoutUnicodePtr != 0) {
2916     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2917     //        }
2918     //      }
2919     //    } else
2920     currentCharacter = source[currentPosition++];
2921     switch (currentCharacter) {
2922     //      case 'b' :
2923     //        currentCharacter = '\b';
2924     //        break;
2925     case 't':
2926       currentCharacter = '\t';
2927       break;
2928     case 'n':
2929       currentCharacter = '\n';
2930       break;
2931     //      case 'f' :
2932     //        currentCharacter = '\f';
2933     //        break;
2934     case 'r':
2935       currentCharacter = '\r';
2936       break;
2937     case '\"':
2938       currentCharacter = '\"';
2939       break;
2940     case '\'':
2941       currentCharacter = '\'';
2942       break;
2943     case '\\':
2944       currentCharacter = '\\';
2945       break;
2946     case '$':
2947       currentCharacter = '$';
2948       break;
2949     default:
2950       // -----------octal escape--------------
2951       // OctalDigit
2952       // OctalDigit OctalDigit
2953       // ZeroToThree OctalDigit OctalDigit
2954       int number = Character.getNumericValue(currentCharacter);
2955       if (number >= 0 && number <= 7) {
2956         boolean zeroToThreeNot = number > 3;
2957         if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2958           int digit = Character.getNumericValue(currentCharacter);
2959           if (digit >= 0 && digit <= 7) {
2960             number = (number * 8) + digit;
2961             if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2962               if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2963                 // Digit --> ignore last character
2964                 currentPosition--;
2965               } else {
2966                 digit = Character.getNumericValue(currentCharacter);
2967                 if (digit >= 0 && digit <= 7) {
2968                   // has read \ZeroToThree OctalDigit OctalDigit
2969                   number = (number * 8) + digit;
2970                 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2971                   // --> ignore last character
2972                   currentPosition--;
2973                 }
2974               }
2975             } else { // has read \OctalDigit NonDigit--> ignore last
2976               // character
2977               currentPosition--;
2978             }
2979           } else { // has read \OctalDigit NonOctalDigit--> ignore last
2980             // character
2981             currentPosition--;
2982           }
2983         } else { // has read \OctalDigit --> ignore last character
2984           currentPosition--;
2985         }
2986         if (number > 255)
2987           throw new InvalidInputException(INVALID_ESCAPE);
2988         currentCharacter = (char) number;
2989       }
2990     //else
2991     //     throw new InvalidInputException(INVALID_ESCAPE);
2992     }
2993   }
2994
2995   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2996   //    return scanIdentifierOrKeyword( false );
2997   //  }
2998   public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
2999     //test keywords
3000     //first dispatch on the first char.
3001     //then the length. If there are several
3002     //keywors with the same length AND the same first char, then do another
3003     //disptach on the second char :-)...cool....but fast !
3004     useAssertAsAnIndentifier = false;
3005     while (getNextCharAsJavaIdentifierPart()) {
3006     }
3007     ;
3008     if (isVariable) {
3009       //      if (new String(getCurrentTokenSource()).equals("$this")) {
3010       //        return TokenNamethis;
3011       //      }
3012       return TokenNameVariable;
3013     }
3014     int index, length;
3015     char[] data;
3016     char firstLetter;
3017     //    if (withoutUnicodePtr == 0)
3018     //quick test on length == 1 but not on length > 12 while most identifier
3019     //have a length which is <= 12...but there are lots of identifier with
3020     //only one char....
3021     //      {
3022     if ((length = currentPosition - startPosition) == 1)
3023       return TokenNameIdentifier;
3024     //  data = source;
3025     data = new char[length];
3026     index = startPosition;
3027     for (int i = 0; i < length; i++) {
3028       data[i] = Character.toLowerCase(source[index + i]);
3029     }
3030     index = 0;
3031     //    } else {
3032     //      if ((length = withoutUnicodePtr) == 1)
3033     //        return TokenNameIdentifier;
3034     //      // data = withoutUnicodeBuffer;
3035     //      data = new char[withoutUnicodeBuffer.length];
3036     //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
3037     //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
3038     //      }
3039     //      index = 1;
3040     //    }
3041     firstLetter = data[index];
3042     switch (firstLetter) {
3043     case '_':
3044       switch (length) {
3045       case 8:
3046         //__FILE__
3047         if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
3048             && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3049           return TokenNameFILE;
3050         index = 0; //__LINE__
3051         if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
3052             && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3053           return TokenNameLINE;
3054         break;
3055       case 9:
3056         //__CLASS__
3057         if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3058             && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
3059           return TokenNameCLASS_C;
3060         break;
3061       case 11:
3062         //__METHOD__
3063         if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
3064             && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
3065             && (data[++index] == '_'))
3066           return TokenNameMETHOD_C;
3067         break;
3068       case 12:
3069         //__FUNCTION__
3070         if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
3071             && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
3072             && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
3073           return TokenNameFUNC_C;
3074         break;
3075       }
3076       return TokenNameIdentifier;
3077     case 'a':
3078       // as and array abstract
3079       switch (length) {
3080       case 2:
3081         //as
3082         if ((data[++index] == 's')) {
3083           return TokenNameas;
3084         } else {
3085           return TokenNameIdentifier;
3086         }
3087       case 3:
3088         //and
3089         if ((data[++index] == 'n') && (data[++index] == 'd')) {
3090           return TokenNameand;
3091         } else {
3092           return TokenNameIdentifier;
3093         }
3094       case 5:
3095         // array
3096         if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3097           return TokenNamearray;
3098         else
3099           return TokenNameIdentifier;
3100       case 8:
3101         if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3102             && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3103           return TokenNameabstract;
3104         else
3105           return TokenNameIdentifier;
3106       default:
3107         return TokenNameIdentifier;
3108       }
3109     case 'b':
3110       //break
3111       switch (length) {
3112       case 5:
3113         if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3114           return TokenNamebreak;
3115         else
3116           return TokenNameIdentifier;
3117       default:
3118         return TokenNameIdentifier;
3119       }
3120     case 'c':
3121       //case catch class clone const continue
3122       switch (length) {
3123       case 4:
3124         if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3125           return TokenNamecase;
3126         else
3127           return TokenNameIdentifier;
3128       case 5:
3129         if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3130           return TokenNamecatch;
3131         index = 0;
3132         if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3133           return TokenNameclass;
3134         index = 0;
3135         if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3136           return TokenNameclone;
3137         index = 0;
3138         if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3139           return TokenNameconst;
3140         else
3141           return TokenNameIdentifier;
3142       case 8:
3143         if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3144             && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3145           return TokenNamecontinue;
3146         else
3147           return TokenNameIdentifier;
3148       default:
3149         return TokenNameIdentifier;
3150       }
3151     case 'd':
3152       // declare default do die
3153       // TODO delete define ==> no keyword !
3154       switch (length) {
3155       case 2:
3156         if ((data[++index] == 'o'))
3157           return TokenNamedo;
3158         else
3159           return TokenNameIdentifier;
3160       //          case 6 :
3161       //            if ((data[++index] == 'e')
3162       //              && (data[++index] == 'f')
3163       //              && (data[++index] == 'i')
3164       //              && (data[++index] == 'n')
3165       //              && (data[++index] == 'e'))
3166       //              return TokenNamedefine;
3167       //            else
3168       //              return TokenNameIdentifier;
3169       case 7:
3170         if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3171             && (data[++index] == 'r') && (data[++index] == 'e'))
3172           return TokenNamedeclare;
3173         index = 0;
3174         if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3175             && (data[++index] == 'l') && (data[++index] == 't'))
3176           return TokenNamedefault;
3177         else
3178           return TokenNameIdentifier;
3179       default:
3180         return TokenNameIdentifier;
3181       }
3182     case 'e':
3183       //echo else exit elseif extends eval
3184       switch (length) {
3185       case 4:
3186         if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3187           return TokenNameecho;
3188         else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3189           return TokenNameelse;
3190         else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3191           return TokenNameexit;
3192         else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3193           return TokenNameeval;
3194         else
3195           return TokenNameIdentifier;
3196       case 5:
3197         // endif empty
3198         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3199           return TokenNameendif;
3200         if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3201           return TokenNameempty;
3202         else
3203           return TokenNameIdentifier;
3204       case 6:
3205         // endfor
3206         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3207             && (data[++index] == 'r'))
3208           return TokenNameendfor;
3209         else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3210             && (data[++index] == 'f'))
3211           return TokenNameelseif;
3212         else
3213           return TokenNameIdentifier;
3214       case 7:
3215         if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3216             && (data[++index] == 'd') && (data[++index] == 's'))
3217           return TokenNameextends;
3218         else
3219           return TokenNameIdentifier;
3220       case 8:
3221         // endwhile
3222         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3223             && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3224           return TokenNameendwhile;
3225         else
3226           return TokenNameIdentifier;
3227       case 9:
3228         // endswitch
3229         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3230             && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3231           return TokenNameendswitch;
3232         else
3233           return TokenNameIdentifier;
3234       case 10:
3235         // enddeclare
3236         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3237             && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3238             && (data[++index] == 'e'))
3239           return TokenNameenddeclare;
3240         index = 0;
3241         if ((data[++index] == 'n') // endforeach
3242             && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3243             && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3244           return TokenNameendforeach;
3245         else
3246           return TokenNameIdentifier;
3247       default:
3248         return TokenNameIdentifier;
3249       }
3250     case 'f':
3251       //for false final function
3252       switch (length) {
3253       case 3:
3254         if ((data[++index] == 'o') && (data[++index] == 'r'))
3255           return TokenNamefor;
3256         else
3257           return TokenNameIdentifier;
3258       case 5:
3259         //            if ((data[++index] == 'a') && (data[++index] == 'l')
3260         //                && (data[++index] == 's') && (data[++index] == 'e'))
3261         //              return TokenNamefalse;
3262         if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3263           return TokenNamefinal;
3264         else
3265           return TokenNameIdentifier;
3266       case 7:
3267         // foreach
3268         if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3269             && (data[++index] == 'c') && (data[++index] == 'h'))
3270           return TokenNameforeach;
3271         else
3272           return TokenNameIdentifier;
3273       case 8:
3274         // function
3275         if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3276             && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3277           return TokenNamefunction;
3278         else
3279           return TokenNameIdentifier;
3280       default:
3281         return TokenNameIdentifier;
3282       }
3283     case 'g':
3284       //global
3285       if (length == 6) {
3286         if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3287             && (data[++index] == 'l')) {
3288           return TokenNameglobal;
3289         }
3290       }
3291       return TokenNameIdentifier;
3292     case 'i':
3293       //if int isset include include_once instanceof interface implements
3294       switch (length) {
3295       case 2:
3296         if (data[++index] == 'f')
3297           return TokenNameif;
3298         else
3299           return TokenNameIdentifier;
3300       //          case 3 :
3301       //            if ((data[++index] == 'n') && (data[++index] == 't'))
3302       //              return TokenNameint;
3303       //            else
3304       //              return TokenNameIdentifier;
3305       case 5:
3306         if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3307           return TokenNameisset;
3308         else
3309           return TokenNameIdentifier;
3310       case 7:
3311         if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3312             && (data[++index] == 'd') && (data[++index] == 'e'))
3313           return TokenNameinclude;
3314         else
3315           return TokenNameIdentifier;
3316       case 9:
3317         // interface
3318         if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3319             && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3320           return TokenNameinterface;
3321         else
3322           return TokenNameIdentifier;
3323       case 10:
3324         // instanceof
3325         if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3326             && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3327             && (data[++index] == 'f'))
3328           return TokenNameinstanceof;
3329         if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3330             && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3331             && (data[++index] == 's'))
3332           return TokenNameimplements;
3333         else
3334           return TokenNameIdentifier;
3335       case 12:
3336         if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3337             && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3338             && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3339           return TokenNameinclude_once;
3340         else
3341           return TokenNameIdentifier;
3342       default:
3343         return TokenNameIdentifier;
3344       }
3345     case 'l':
3346       //list
3347       if (length == 4) {
3348         if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3349           return TokenNamelist;
3350         }
3351       }
3352       return TokenNameIdentifier;
3353     case 'n':
3354       // new null
3355       switch (length) {
3356       case 3:
3357         if ((data[++index] == 'e') && (data[++index] == 'w'))
3358           return TokenNamenew;
3359         else
3360           return TokenNameIdentifier;
3361       //          case 4 :
3362       //            if ((data[++index] == 'u') && (data[++index] == 'l')
3363       //                && (data[++index] == 'l'))
3364       //              return TokenNamenull;
3365       //            else
3366       //              return TokenNameIdentifier;
3367       default:
3368         return TokenNameIdentifier;
3369       }
3370     case 'o':
3371       // or old_function
3372       if (length == 2) {
3373         if (data[++index] == 'r') {
3374           return TokenNameor;
3375         }
3376       }
3377       //        if (length == 12) {
3378       //          if ((data[++index] == 'l')
3379       //            && (data[++index] == 'd')
3380       //            && (data[++index] == '_')
3381       //            && (data[++index] == 'f')
3382       //            && (data[++index] == 'u')
3383       //            && (data[++index] == 'n')
3384       //            && (data[++index] == 'c')
3385       //            && (data[++index] == 't')
3386       //            && (data[++index] == 'i')
3387       //            && (data[++index] == 'o')
3388       //            && (data[++index] == 'n')) {
3389       //            return TokenNameold_function;
3390       //          }
3391       //        }
3392       return TokenNameIdentifier;
3393     case 'p':
3394       // print public private protected
3395       switch (length) {
3396       case 5:
3397         if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3398           return TokenNameprint;
3399         } else
3400           return TokenNameIdentifier;
3401       case 6:
3402         if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3403             && (data[++index] == 'c')) {
3404           return TokenNamepublic;
3405         } else
3406           return TokenNameIdentifier;
3407       case 7:
3408         if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3409             && (data[++index] == 't') && (data[++index] == 'e')) {
3410           return TokenNameprivate;
3411         } else
3412           return TokenNameIdentifier;
3413       case 9:
3414         if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3415             && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3416           return TokenNameprotected;
3417         } else
3418           return TokenNameIdentifier;
3419       }
3420       return TokenNameIdentifier;
3421     case 'r':
3422       //return require require_once
3423       if (length == 6) {
3424         if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3425             && (data[++index] == 'n')) {
3426           return TokenNamereturn;
3427         }
3428       } else if (length == 7) {
3429         if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3430             && (data[++index] == 'r') && (data[++index] == 'e')) {
3431           return TokenNamerequire;
3432         }
3433       } else if (length == 12) {
3434         if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3435             && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3436             && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3437           return TokenNamerequire_once;
3438         }
3439       } else
3440         return TokenNameIdentifier;
3441     case 's':
3442       //static switch
3443       switch (length) {
3444       case 6:
3445         if (data[++index] == 't')
3446           if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3447             return TokenNamestatic;
3448           } else
3449             return TokenNameIdentifier;
3450         else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3451             && (data[++index] == 'h'))
3452           return TokenNameswitch;
3453         else
3454           return TokenNameIdentifier;
3455       default:
3456         return TokenNameIdentifier;
3457       }
3458     case 't':
3459       // try true throw
3460       switch (length) {
3461       case 3:
3462         if ((data[++index] == 'r') && (data[++index] == 'y'))
3463           return TokenNametry;
3464         else
3465           return TokenNameIdentifier;
3466       //          case 4 :
3467       //            if ((data[++index] == 'r') && (data[++index] == 'u')
3468       //                && (data[++index] == 'e'))
3469       //              return TokenNametrue;
3470       //            else
3471       //              return TokenNameIdentifier;
3472       case 5:
3473         if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3474           return TokenNamethrow;
3475         else
3476           return TokenNameIdentifier;
3477       default:
3478         return TokenNameIdentifier;
3479       }
3480     case 'u':
3481       //use unset
3482       switch (length) {
3483       case 3:
3484         if ((data[++index] == 's') && (data[++index] == 'e'))
3485           return TokenNameuse;
3486         else
3487           return TokenNameIdentifier;
3488       case 5:
3489         if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3490           return TokenNameunset;
3491         else
3492           return TokenNameIdentifier;
3493       default:
3494         return TokenNameIdentifier;
3495       }
3496     case 'v':
3497       //var
3498       switch (length) {
3499       case 3:
3500         if ((data[++index] == 'a') && (data[++index] == 'r'))
3501           return TokenNamevar;
3502         else
3503           return TokenNameIdentifier;
3504       default:
3505         return TokenNameIdentifier;
3506       }
3507     case 'w':
3508       //while
3509       switch (length) {
3510       case 5:
3511         if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3512           return TokenNamewhile;
3513         else
3514           return TokenNameIdentifier;
3515       //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3516       // (data[++index]=='e') && (data[++index]=='f')&&
3517       // (data[++index]=='p'))
3518       //return TokenNamewidefp ;
3519       //else
3520       //return TokenNameIdentifier;
3521       default:
3522         return TokenNameIdentifier;
3523       }
3524     case 'x':
3525       //xor
3526       switch (length) {
3527       case 3:
3528         if ((data[++index] == 'o') && (data[++index] == 'r'))
3529           return TokenNamexor;
3530         else
3531           return TokenNameIdentifier;
3532       default:
3533         return TokenNameIdentifier;
3534       }
3535     default:
3536       return TokenNameIdentifier;
3537     }
3538   }
3539
3540   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3541     //when entering this method the currentCharacter is the firt
3542     //digit of the number , i.e. it may be preceeded by a . when
3543     //dotPrefix is true
3544     boolean floating = dotPrefix;
3545     if ((!dotPrefix) && (currentCharacter == '0')) {
3546       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3547         //force the first char of the hexa number do exist...
3548         // consume next character
3549         unicodeAsBackSlash = false;
3550         currentCharacter = source[currentPosition++];
3551         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3552         //          && (source[currentPosition] == 'u')) {
3553         //          getNextUnicodeChar();
3554         //        } else {
3555         //          if (withoutUnicodePtr != 0) {
3556         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3557         //          }
3558         //        }
3559         if (Character.digit(currentCharacter, 16) == -1)
3560           throw new InvalidInputException(INVALID_HEXA);
3561         //---end forcing--
3562         while (getNextCharAsDigit(16)) {
3563         }
3564         ;
3565         //        if (getNextChar('l', 'L') >= 0)
3566         //          return TokenNameLongLiteral;
3567         //        else
3568         return TokenNameIntegerLiteral;
3569       }
3570       //there is x or X in the number
3571       //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3572       // 00078.0 is true !!!!! crazy language
3573       if (getNextCharAsDigit()) {
3574         //-------------potential octal-----------------
3575         while (getNextCharAsDigit()) {
3576         }
3577         ;
3578         //        if (getNextChar('l', 'L') >= 0) {
3579         //          return TokenNameLongLiteral;
3580         //        }
3581         //
3582         //        if (getNextChar('f', 'F') >= 0) {
3583         //          return TokenNameFloatingPointLiteral;
3584         //        }
3585         if (getNextChar('d', 'D') >= 0) {
3586           return TokenNameDoubleLiteral;
3587         } else { //make the distinction between octal and float ....
3588           if (getNextChar('.')) { //bingo ! ....
3589             while (getNextCharAsDigit()) {
3590             }
3591             ;
3592             if (getNextChar('e', 'E') >= 0) {
3593               // consume next character
3594               unicodeAsBackSlash = false;
3595               currentCharacter = source[currentPosition++];
3596               //              if (((currentCharacter = source[currentPosition++]) == '\\')
3597               //                && (source[currentPosition] == 'u')) {
3598               //                getNextUnicodeChar();
3599               //              } else {
3600               //                if (withoutUnicodePtr != 0) {
3601               //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3602               //                }
3603               //              }
3604               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3605                 // consume next character
3606                 unicodeAsBackSlash = false;
3607                 currentCharacter = source[currentPosition++];
3608                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3609                 //                  && (source[currentPosition] == 'u')) {
3610                 //                  getNextUnicodeChar();
3611                 //                } else {
3612                 //                  if (withoutUnicodePtr != 0) {
3613                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3614                 //                      currentCharacter;
3615                 //                  }
3616                 //                }
3617               }
3618               if (!Character.isDigit(currentCharacter))
3619                 throw new InvalidInputException(INVALID_FLOAT);
3620               while (getNextCharAsDigit()) {
3621               }
3622               ;
3623             }
3624             //            if (getNextChar('f', 'F') >= 0)
3625             //              return TokenNameFloatingPointLiteral;
3626             getNextChar('d', 'D'); //jump over potential d or D
3627             return TokenNameDoubleLiteral;
3628           } else {
3629             return TokenNameIntegerLiteral;
3630           }
3631         }
3632       } else {
3633         /* carry on */
3634       }
3635     }
3636     while (getNextCharAsDigit()) {
3637     }
3638     ;
3639     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3640     //      return TokenNameLongLiteral;
3641     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3642       while (getNextCharAsDigit()) {
3643       }
3644       ;
3645       floating = true;
3646     }
3647     //if floating is true both exponant and suffix may be optional
3648     if (getNextChar('e', 'E') >= 0) {
3649       floating = true;
3650       // consume next character
3651       unicodeAsBackSlash = false;
3652       currentCharacter = source[currentPosition++];
3653       //      if (((currentCharacter = source[currentPosition++]) == '\\')
3654       //        && (source[currentPosition] == 'u')) {
3655       //        getNextUnicodeChar();
3656       //      } else {
3657       //        if (withoutUnicodePtr != 0) {
3658       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3659       //        }
3660       //      }
3661       if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3662         // next
3663         // character
3664         unicodeAsBackSlash = false;
3665         currentCharacter = source[currentPosition++];
3666         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3667         //          && (source[currentPosition] == 'u')) {
3668         //          getNextUnicodeChar();
3669         //        } else {
3670         //          if (withoutUnicodePtr != 0) {
3671         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3672         //          }
3673         //        }
3674       }
3675       if (!Character.isDigit(currentCharacter))
3676         throw new InvalidInputException(INVALID_FLOAT);
3677       while (getNextCharAsDigit()) {
3678       }
3679       ;
3680     }
3681     if (getNextChar('d', 'D') >= 0)
3682       return TokenNameDoubleLiteral;
3683     //    if (getNextChar('f', 'F') >= 0)
3684     //      return TokenNameFloatingPointLiteral;
3685     //the long flag has been tested before
3686     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3687   }
3688
3689   /**
3690    * Search the line number corresponding to a specific position
3691    *
3692    */
3693   public final int getLineNumber(int position) {
3694     if (lineEnds == null)
3695       return 1;
3696     int length = linePtr + 1;
3697     if (length == 0)
3698       return 1;
3699     int g = 0, d = length - 1;
3700     int m = 0;
3701     while (g <= d) {
3702       m = (g + d) / 2;
3703       if (position < lineEnds[m]) {
3704         d = m - 1;
3705       } else if (position > lineEnds[m]) {
3706         g = m + 1;
3707       } else {
3708         return m + 1;
3709       }
3710     }
3711     if (position < lineEnds[m]) {
3712       return m + 1;
3713     }
3714     return m + 2;
3715   }
3716
3717   public void setPHPMode(boolean mode) {
3718     phpMode = mode;
3719   }
3720
3721   public final void setSource(char[] source) {
3722     setSource(null, source);
3723   }
3724
3725   public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3726     //the source-buffer is set to sourceString
3727     this.compilationUnit = compilationUnit;
3728     if (source == null) {
3729       this.source = new char[0];
3730     } else {
3731       this.source = source;
3732     }
3733     startPosition = -1;
3734     initialPosition = currentPosition = 0;
3735     containsAssertKeyword = false;
3736     withoutUnicodeBuffer = new char[this.source.length];
3737     encapsedStringStack = new Stack();
3738   }
3739
3740   public String toString() {
3741     if (startPosition == source.length)
3742       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3743     if (currentPosition > source.length)
3744       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3745     char front[] = new char[startPosition];
3746     System.arraycopy(source, 0, front, 0, startPosition);
3747     int middleLength = (currentPosition - 1) - startPosition + 1;
3748     char middle[];
3749     if (middleLength > -1) {
3750       middle = new char[middleLength];
3751       System.arraycopy(source, startPosition, middle, 0, middleLength);
3752     } else {
3753       middle = new char[0];
3754     }
3755     char end[] = new char[source.length - (currentPosition - 1)];
3756     System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3757     return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3758         + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3759         + new String(end);
3760   }
3761
3762   public final String toStringAction(int act) {
3763     switch (act) {
3764     case TokenNameERROR:
3765       return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3766     // //$NON-NLS-1$
3767     case TokenNameINLINE_HTML:
3768       return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3769     case TokenNameIdentifier:
3770       return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3771     case TokenNameVariable:
3772       return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3773     case TokenNameabstract:
3774       return "abstract"; //$NON-NLS-1$
3775     case TokenNameand:
3776       return "AND"; //$NON-NLS-1$
3777     case TokenNamearray:
3778       return "array"; //$NON-NLS-1$
3779     case TokenNameas:
3780       return "as"; //$NON-NLS-1$
3781     case TokenNamebreak:
3782       return "break"; //$NON-NLS-1$
3783     case TokenNamecase:
3784       return "case"; //$NON-NLS-1$
3785     case TokenNameclass:
3786       return "class"; //$NON-NLS-1$
3787     case TokenNamecatch:
3788       return "catch"; //$NON-NLS-1$
3789     case TokenNameclone:
3790       //$NON-NLS-1$
3791       return "clone";
3792     case TokenNameconst:
3793       //$NON-NLS-1$
3794       return "const";
3795     case TokenNamecontinue:
3796       return "continue"; //$NON-NLS-1$
3797     case TokenNamedefault:
3798       return "default"; //$NON-NLS-1$
3799     //      case TokenNamedefine :
3800     //        return "define"; //$NON-NLS-1$
3801     case TokenNamedo:
3802       return "do"; //$NON-NLS-1$
3803     case TokenNameecho:
3804       return "echo"; //$NON-NLS-1$
3805     case TokenNameelse:
3806       return "else"; //$NON-NLS-1$
3807     case TokenNameelseif:
3808       return "elseif"; //$NON-NLS-1$
3809     case TokenNameendfor:
3810       return "endfor"; //$NON-NLS-1$
3811     case TokenNameendforeach:
3812       return "endforeach"; //$NON-NLS-1$
3813     case TokenNameendif:
3814       return "endif"; //$NON-NLS-1$
3815     case TokenNameendswitch:
3816       return "endswitch"; //$NON-NLS-1$
3817     case TokenNameendwhile:
3818       return "endwhile"; //$NON-NLS-1$
3819     case TokenNameexit:
3820       return "exit";
3821     case TokenNameextends:
3822       return "extends"; //$NON-NLS-1$
3823     //      case TokenNamefalse :
3824     //        return "false"; //$NON-NLS-1$
3825     case TokenNamefinal:
3826       return "final"; //$NON-NLS-1$
3827     case TokenNamefor:
3828       return "for"; //$NON-NLS-1$
3829     case TokenNameforeach:
3830       return "foreach"; //$NON-NLS-1$
3831     case TokenNamefunction:
3832       return "function"; //$NON-NLS-1$
3833     case TokenNameglobal:
3834       return "global"; //$NON-NLS-1$
3835     case TokenNameif:
3836       return "if"; //$NON-NLS-1$
3837     case TokenNameimplements:
3838       return "implements"; //$NON-NLS-1$
3839     case TokenNameinclude:
3840       return "include"; //$NON-NLS-1$
3841     case TokenNameinclude_once:
3842       return "include_once"; //$NON-NLS-1$
3843     case TokenNameinstanceof:
3844       return "instanceof"; //$NON-NLS-1$
3845     case TokenNameinterface:
3846       return "interface"; //$NON-NLS-1$
3847     case TokenNameisset:
3848       return "isset"; //$NON-NLS-1$
3849     case TokenNamelist:
3850       return "list"; //$NON-NLS-1$
3851     case TokenNamenew:
3852       return "new"; //$NON-NLS-1$
3853     //      case TokenNamenull :
3854     //        return "null"; //$NON-NLS-1$
3855     case TokenNameor:
3856       return "OR"; //$NON-NLS-1$
3857     case TokenNameprint:
3858       return "print"; //$NON-NLS-1$
3859     case TokenNameprivate:
3860       return "private"; //$NON-NLS-1$
3861     case TokenNameprotected:
3862       return "protected"; //$NON-NLS-1$
3863     case TokenNamepublic:
3864       return "public"; //$NON-NLS-1$
3865     case TokenNamerequire:
3866       return "require"; //$NON-NLS-1$
3867     case TokenNamerequire_once:
3868       return "require_once"; //$NON-NLS-1$
3869     case TokenNamereturn:
3870       return "return"; //$NON-NLS-1$
3871     case TokenNamestatic:
3872       return "static"; //$NON-NLS-1$
3873     case TokenNameswitch:
3874       return "switch"; //$NON-NLS-1$
3875     //      case TokenNametrue :
3876     //        return "true"; //$NON-NLS-1$
3877     case TokenNameunset:
3878       return "unset"; //$NON-NLS-1$
3879     case TokenNamevar:
3880       return "var"; //$NON-NLS-1$
3881     case TokenNamewhile:
3882       return "while"; //$NON-NLS-1$
3883     case TokenNamexor:
3884       return "XOR"; //$NON-NLS-1$
3885     //      case TokenNamethis :
3886     //        return "$this"; //$NON-NLS-1$
3887     case TokenNameIntegerLiteral:
3888       return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3889     case TokenNameDoubleLiteral:
3890       return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3891     case TokenNameStringDoubleQuote:
3892       return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3893     case TokenNameStringSingleQuote:
3894       return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3895     case TokenNameStringInterpolated:
3896       return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3897     case TokenNameEncapsedString0:
3898       return "`"; //$NON-NLS-1$
3899     case TokenNameEncapsedString1:
3900       return "\'"; //$NON-NLS-1$
3901     case TokenNameEncapsedString2:
3902       return "\""; //$NON-NLS-1$
3903     case TokenNameSTRING:
3904       return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3905     case TokenNameHEREDOC:
3906       return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3907     case TokenNamePLUS_PLUS:
3908       return "++"; //$NON-NLS-1$
3909     case TokenNameMINUS_MINUS:
3910       return "--"; //$NON-NLS-1$
3911     case TokenNameEQUAL_EQUAL:
3912       return "=="; //$NON-NLS-1$
3913     case TokenNameEQUAL_EQUAL_EQUAL:
3914       return "==="; //$NON-NLS-1$
3915     case TokenNameEQUAL_GREATER:
3916       return "=>"; //$NON-NLS-1$
3917     case TokenNameLESS_EQUAL:
3918       return "<="; //$NON-NLS-1$
3919     case TokenNameGREATER_EQUAL:
3920       return ">="; //$NON-NLS-1$
3921     case TokenNameNOT_EQUAL:
3922       return "!="; //$NON-NLS-1$
3923     case TokenNameNOT_EQUAL_EQUAL:
3924       return "!=="; //$NON-NLS-1$
3925     case TokenNameLEFT_SHIFT:
3926       return "<<"; //$NON-NLS-1$
3927     case TokenNameRIGHT_SHIFT:
3928       return ">>"; //$NON-NLS-1$
3929     case TokenNamePLUS_EQUAL:
3930       return "+="; //$NON-NLS-1$
3931     case TokenNameMINUS_EQUAL:
3932       return "-="; //$NON-NLS-1$
3933     case TokenNameMULTIPLY_EQUAL:
3934       return "*="; //$NON-NLS-1$
3935     case TokenNameDIVIDE_EQUAL:
3936       return "/="; //$NON-NLS-1$
3937     case TokenNameAND_EQUAL:
3938       return "&="; //$NON-NLS-1$
3939     case TokenNameOR_EQUAL:
3940       return "|="; //$NON-NLS-1$
3941     case TokenNameXOR_EQUAL:
3942       return "^="; //$NON-NLS-1$
3943     case TokenNameREMAINDER_EQUAL:
3944       return "%="; //$NON-NLS-1$
3945     case TokenNameDOT_EQUAL:
3946       return ".="; //$NON-NLS-1$
3947     case TokenNameLEFT_SHIFT_EQUAL:
3948       return "<<="; //$NON-NLS-1$
3949     case TokenNameRIGHT_SHIFT_EQUAL:
3950       return ">>="; //$NON-NLS-1$
3951     case TokenNameOR_OR:
3952       return "||"; //$NON-NLS-1$
3953     case TokenNameAND_AND:
3954       return "&&"; //$NON-NLS-1$
3955     case TokenNamePLUS:
3956       return "+"; //$NON-NLS-1$
3957     case TokenNameMINUS:
3958       return "-"; //$NON-NLS-1$
3959     case TokenNameMINUS_GREATER:
3960       return "->";
3961     case TokenNameNOT:
3962       return "!"; //$NON-NLS-1$
3963     case TokenNameREMAINDER:
3964       return "%"; //$NON-NLS-1$
3965     case TokenNameXOR:
3966       return "^"; //$NON-NLS-1$
3967     case TokenNameAND:
3968       return "&"; //$NON-NLS-1$
3969     case TokenNameMULTIPLY:
3970       return "*"; //$NON-NLS-1$
3971     case TokenNameOR:
3972       return "|"; //$NON-NLS-1$
3973     case TokenNameTWIDDLE:
3974       return "~"; //$NON-NLS-1$
3975     case TokenNameTWIDDLE_EQUAL:
3976       return "~="; //$NON-NLS-1$
3977     case TokenNameDIVIDE:
3978       return "/"; //$NON-NLS-1$
3979     case TokenNameGREATER:
3980       return ">"; //$NON-NLS-1$
3981     case TokenNameLESS:
3982       return "<"; //$NON-NLS-1$
3983     case TokenNameLPAREN:
3984       return "("; //$NON-NLS-1$
3985     case TokenNameRPAREN:
3986       return ")"; //$NON-NLS-1$
3987     case TokenNameLBRACE:
3988       return "{"; //$NON-NLS-1$
3989     case TokenNameRBRACE:
3990       return "}"; //$NON-NLS-1$
3991     case TokenNameLBRACKET:
3992       return "["; //$NON-NLS-1$
3993     case TokenNameRBRACKET:
3994       return "]"; //$NON-NLS-1$
3995     case TokenNameSEMICOLON:
3996       return ";"; //$NON-NLS-1$
3997     case TokenNameQUESTION:
3998       return "?"; //$NON-NLS-1$
3999     case TokenNameCOLON:
4000       return ":"; //$NON-NLS-1$
4001     case TokenNameCOMMA:
4002       return ","; //$NON-NLS-1$
4003     case TokenNameDOT:
4004       return "."; //$NON-NLS-1$
4005     case TokenNameEQUAL:
4006       return "="; //$NON-NLS-1$
4007     case TokenNameAT:
4008       return "@";
4009     case TokenNameDOLLAR:
4010       return "$";
4011     case TokenNameDOLLAR_LBRACE:
4012       return "${";
4013     case TokenNameLBRACE_DOLLAR:
4014       return "{$";
4015     case TokenNameEOF:
4016       return "EOF"; //$NON-NLS-1$
4017     case TokenNameWHITESPACE:
4018       return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4019     case TokenNameCOMMENT_LINE:
4020       return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4021     case TokenNameCOMMENT_BLOCK:
4022       return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4023     case TokenNameCOMMENT_PHPDOC:
4024       return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4025     //      case TokenNameHTML :
4026     //        return "HTML(" + new String(getCurrentTokenSource()) + ")";
4027     // //$NON-NLS-1$
4028     case TokenNameFILE:
4029       return "__FILE__"; //$NON-NLS-1$
4030     case TokenNameLINE:
4031       return "__LINE__"; //$NON-NLS-1$
4032     case TokenNameCLASS_C:
4033       return "__CLASS__"; //$NON-NLS-1$
4034     case TokenNameMETHOD_C:
4035       return "__METHOD__"; //$NON-NLS-1$
4036     case TokenNameFUNC_C:
4037       return "__FUNCTION__"; //$NON-NLS-1
4038     case TokenNameboolCAST:
4039       return "( bool )"; //$NON-NLS-1$
4040     case TokenNameintCAST:
4041       return "( int )"; //$NON-NLS-1$
4042     case TokenNamedoubleCAST:
4043       return "( double )"; //$NON-NLS-1$
4044     case TokenNameobjectCAST:
4045       return "( object )"; //$NON-NLS-1$
4046     case TokenNamestringCAST:
4047       return "( string )"; //$NON-NLS-1$
4048     default:
4049       return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
4050     }
4051   }
4052
4053   public Scanner() {
4054     this(false, false);
4055   }
4056
4057   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
4058     this(tokenizeComments, tokenizeWhiteSpace, false);
4059   }
4060
4061   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
4062     this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
4063   }
4064
4065   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4066       boolean assertMode) {
4067     this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null, true);
4068   }
4069
4070   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4071       boolean assertMode, boolean tokenizeStrings, char[][] taskTags, char[][] taskPriorities, boolean isTaskCaseSensitive) {
4072     this.eofPosition = Integer.MAX_VALUE;
4073     this.tokenizeComments = tokenizeComments;
4074     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
4075     this.tokenizeStrings = tokenizeStrings;
4076     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
4077     this.assertMode = assertMode;
4078     this.encapsedStringStack = null;
4079     this.taskTags = taskTags;
4080     this.taskPriorities = taskPriorities;
4081   }
4082
4083   private void checkNonExternalizeString() throws InvalidInputException {
4084     if (currentLine == null)
4085       return;
4086     parseTags(currentLine);
4087   }
4088
4089   private void parseTags(NLSLine line) throws InvalidInputException {
4090     String s = new String(getCurrentTokenSource());
4091     int pos = s.indexOf(TAG_PREFIX);
4092     int lineLength = line.size();
4093     while (pos != -1) {
4094       int start = pos + TAG_PREFIX_LENGTH;
4095       int end = s.indexOf(TAG_POSTFIX, start);
4096       String index = s.substring(start, end);
4097       int i = 0;
4098       try {
4099         i = Integer.parseInt(index) - 1;
4100         // Tags are one based not zero based.
4101       } catch (NumberFormatException e) {
4102         i = -1; // we don't want to consider this as a valid NLS tag
4103       }
4104       if (line.exists(i)) {
4105         line.set(i, null);
4106       }
4107       pos = s.indexOf(TAG_PREFIX, start);
4108     }
4109     this.nonNLSStrings = new StringLiteral[lineLength];
4110     int nonNLSCounter = 0;
4111     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4112       StringLiteral literal = (StringLiteral) iterator.next();
4113       if (literal != null) {
4114         this.nonNLSStrings[nonNLSCounter++] = literal;
4115       }
4116     }
4117     if (nonNLSCounter == 0) {
4118       this.nonNLSStrings = null;
4119       currentLine = null;
4120       return;
4121     }
4122     this.wasNonExternalizedStringLiteral = true;
4123     if (nonNLSCounter != lineLength) {
4124       System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4125     }
4126     currentLine = null;
4127   }
4128
4129   public final void scanEscapeCharacter() throws InvalidInputException {
4130     // the string with "\\u" is a legal string of two chars \ and u
4131     //thus we use a direct access to the source (for regular cases).
4132     if (unicodeAsBackSlash) {
4133       // consume next character
4134       unicodeAsBackSlash = false;
4135       //                        if (((currentCharacter = source[currentPosition++]) == '\\') &&
4136       // (source[currentPosition] == 'u')) {
4137       //                                getNextUnicodeChar();
4138       //                        } else {
4139       if (withoutUnicodePtr != 0) {
4140         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4141         //                              }
4142       }
4143     } else
4144       currentCharacter = source[currentPosition++];
4145     switch (currentCharacter) {
4146     case 'b':
4147       currentCharacter = '\b';
4148       break;
4149     case 't':
4150       currentCharacter = '\t';
4151       break;
4152     case 'n':
4153       currentCharacter = '\n';
4154       break;
4155     case 'f':
4156       currentCharacter = '\f';
4157       break;
4158     case 'r':
4159       currentCharacter = '\r';
4160       break;
4161     case '\"':
4162       currentCharacter = '\"';
4163       break;
4164     case '\'':
4165       currentCharacter = '\'';
4166       break;
4167     case '\\':
4168       currentCharacter = '\\';
4169       break;
4170     default:
4171       // -----------octal escape--------------
4172       // OctalDigit
4173       // OctalDigit OctalDigit
4174       // ZeroToThree OctalDigit OctalDigit
4175       int number = Character.getNumericValue(currentCharacter);
4176       if (number >= 0 && number <= 7) {
4177         boolean zeroToThreeNot = number > 3;
4178         if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4179           int digit = Character.getNumericValue(currentCharacter);
4180           if (digit >= 0 && digit <= 7) {
4181             number = (number * 8) + digit;
4182             if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4183               if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4184                 // Digit --> ignore last character
4185                 currentPosition--;
4186               } else {
4187                 digit = Character.getNumericValue(currentCharacter);
4188                 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4189                   // OctalDigit OctalDigit
4190                   number = (number * 8) + digit;
4191                 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4192                   // --> ignore last character
4193                   currentPosition--;
4194                 }
4195               }
4196             } else { // has read \OctalDigit NonDigit--> ignore last
4197               // character
4198               currentPosition--;
4199             }
4200           } else { // has read \OctalDigit NonOctalDigit--> ignore last
4201             // character
4202             currentPosition--;
4203           }
4204         } else { // has read \OctalDigit --> ignore last character
4205           currentPosition--;
4206         }
4207         if (number > 255)
4208           throw new InvalidInputException(INVALID_ESCAPE);
4209         currentCharacter = (char) number;
4210       } else
4211         throw new InvalidInputException(INVALID_ESCAPE);
4212     }
4213   }
4214
4215   //chech presence of task: tags
4216   //TODO (frederic) see if we need to take unicode characters into account...
4217   public void checkTaskTag(int commentStart, int commentEnd) {
4218     char[] src = this.source;
4219
4220     // only look for newer task: tags
4221     if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4222       return;
4223     }
4224     int foundTaskIndex = this.foundTaskCount;
4225     char previous = src[commentStart + 1]; // should be '*' or '/'
4226     nextChar: for (int i = commentStart + 2; i < commentEnd && i < this.eofPosition; i++) {
4227       char[] tag = null;
4228       char[] priority = null;
4229       // check for tag occurrence only if not ambiguous with javadoc tag
4230       if (previous != '@') {
4231         nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4232           tag = this.taskTags[itag];
4233           int tagLength = tag.length;
4234           if (tagLength == 0)
4235             continue nextTag;
4236
4237           // ensure tag is not leaded with letter if tag starts with a letter
4238           if (Character.isJavaIdentifierStart(tag[0])) {
4239             if (Character.isJavaIdentifierPart(previous)) {
4240               continue nextTag;
4241             }
4242           }
4243
4244           for (int t = 0; t < tagLength; t++) {
4245             char sc, tc;
4246             int x = i + t;
4247             if (x >= this.eofPosition || x >= commentEnd)
4248               continue nextTag;
4249             if ((sc = src[i + t]) != (tc = tag[t])) { // case sensitive check
4250               if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) { // case insensitive check
4251                 continue nextTag;
4252               }
4253             }
4254           }
4255           // ensure tag is not followed with letter if tag finishes with a letter
4256           if (i + tagLength < commentEnd && Character.isJavaIdentifierPart(src[i + tagLength - 1])) {
4257             if (Character.isJavaIdentifierPart(src[i + tagLength]))
4258               continue nextTag;
4259           }
4260           if (this.foundTaskTags == null) {
4261             this.foundTaskTags = new char[5][];
4262             this.foundTaskMessages = new char[5][];
4263             this.foundTaskPriorities = new char[5][];
4264             this.foundTaskPositions = new int[5][];
4265           } else if (this.foundTaskCount == this.foundTaskTags.length) {
4266             System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4267                 this.foundTaskCount);
4268             System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4269                 this.foundTaskCount);
4270             System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4271                 this.foundTaskCount);
4272             System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4273                 this.foundTaskCount);
4274           }
4275
4276           priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4277
4278           this.foundTaskTags[this.foundTaskCount] = tag;
4279           this.foundTaskPriorities[this.foundTaskCount] = priority;
4280           this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4281           this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4282           this.foundTaskCount++;
4283           i += tagLength - 1; // will be incremented when looping
4284           break nextTag;
4285         }
4286       }
4287       previous = src[i];
4288     }
4289     for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4290       // retrieve message start and end positions
4291       int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4292       int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4293       // at most beginning of next task
4294       if (max_value < msgStart) {
4295         max_value = msgStart; // would only occur if tag is before EOF.
4296       }
4297       int end = -1;
4298       char c;
4299       for (int j = msgStart; j < max_value; j++) {
4300         if ((c = src[j]) == '\n' || c == '\r') {
4301           end = j - 1;
4302           break;
4303         }
4304       }
4305       if (end == -1) {
4306         for (int j = max_value; j > msgStart; j--) {
4307           if ((c = src[j]) == '*') {
4308             end = j - 1;
4309             break;
4310           }
4311         }
4312         if (end == -1)
4313           end = max_value;
4314       }
4315       if (msgStart == end)
4316         continue; // empty
4317       // trim the message
4318       while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
4319         end--;
4320       while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
4321         msgStart++;
4322       // update the end position of the task
4323       this.foundTaskPositions[i][1] = end;
4324       // get the message source
4325       final int messageLength = end - msgStart + 1;
4326       char[] message = new char[messageLength];
4327       System.arraycopy(src, msgStart, message, 0, messageLength);
4328       this.foundTaskMessages[i] = message;
4329     }
4330   }
4331
4332   // chech presence of task: tags
4333   //  public void checkTaskTag(int commentStart, int commentEnd) {
4334   //    // only look for newer task: tags
4335   //    if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4336   //      return;
4337   //    }
4338   //    int foundTaskIndex = this.foundTaskCount;
4339   //    nextChar: for (int i = commentStart; i < commentEnd && i < this.eofPosition; i++) {
4340   //      char[] tag = null;
4341   //      char[] priority = null;
4342   //      // check for tag occurrence
4343   //      nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4344   //        tag = this.taskTags[itag];
4345   //        priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4346   //        int tagLength = tag.length;
4347   //        for (int t = 0; t < tagLength; t++) {
4348   //          if (this.source[i + t] != tag[t])
4349   //            continue nextTag;
4350   //        }
4351   //        if (this.foundTaskTags == null) {
4352   //          this.foundTaskTags = new char[5][];
4353   //          this.foundTaskMessages = new char[5][];
4354   //          this.foundTaskPriorities = new char[5][];
4355   //          this.foundTaskPositions = new int[5][];
4356   //        } else if (this.foundTaskCount == this.foundTaskTags.length) {
4357   //          System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4358   //          System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4359   //              this.foundTaskCount);
4360   //          System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4361   //              this.foundTaskCount);
4362   //          System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4363   //              this.foundTaskCount);
4364   //        }
4365   //        this.foundTaskTags[this.foundTaskCount] = tag;
4366   //        this.foundTaskPriorities[this.foundTaskCount] = priority;
4367   //        this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4368   //        this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4369   //        this.foundTaskCount++;
4370   //        i += tagLength - 1; // will be incremented when looping
4371   //      }
4372   //    }
4373   //    for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4374   //      // retrieve message start and end positions
4375   //      int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4376   //      int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4377   //      // at most beginning of next task
4378   //      if (max_value < msgStart)
4379   //        max_value = msgStart; // would only occur if tag is before EOF.
4380   //      int end = -1;
4381   //      char c;
4382   //      for (int j = msgStart; j < max_value; j++) {
4383   //        if ((c = this.source[j]) == '\n' || c == '\r') {
4384   //          end = j - 1;
4385   //          break;
4386   //        }
4387   //      }
4388   //      if (end == -1) {
4389   //        for (int j = max_value; j > msgStart; j--) {
4390   //          if ((c = this.source[j]) == '*') {
4391   //            end = j - 1;
4392   //            break;
4393   //          }
4394   //        }
4395   //        if (end == -1)
4396   //          end = max_value;
4397   //      }
4398   //      if (msgStart == end)
4399   //        continue; // empty
4400   //      // trim the message
4401   //      while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4402   //        end--;
4403   //      while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4404   //        msgStart++;
4405   //      // update the end position of the task
4406   //      this.foundTaskPositions[i][1] = end;
4407   //      // get the message source
4408   //      final int messageLength = end - msgStart + 1;
4409   //      char[] message = new char[messageLength];
4410   //      System.arraycopy(source, msgStart, message, 0, messageLength);
4411   //      this.foundTaskMessages[i] = message;
4412   //    }
4413   //  }
4414 }