net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java

   1 /***********************************************************************************************************************************
   2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
   3  * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
   4  * and is available at http://www.eclipse.org/legal/cpl-v05.html
   5  *
   6  * Contributors: IBM Corporation - initial API and implementation
   7  **********************************************************************************************************************************/
   8 package net.sourceforge.phpdt.internal.compiler.parser;
   9
  10 import java.util.ArrayList;
  11 import java.util.Iterator;
  12 import java.util.List;
  13 import java.util.Stack;
  14
  15 import net.sourceforge.phpdt.core.compiler.CharOperation;
  16 import net.sourceforge.phpdt.core.compiler.IScanner;
  17 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
  18 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
  19 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
  20 import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
  21
  22 public class Scanner implements IScanner, ITerminalSymbols {
  23   /*
  24    * APIs ares - getNextToken() which return the current type of the token (this value is not memorized by the scanner) -
  25    * getCurrentTokenSource() which provides with the token "REAL" source (aka all unicode have been transformed into a correct char) -
  26    * sourceStart gives the position into the stream - currentPosition-1 gives the sourceEnd position into the stream
  27    */
  28   // 1.4 feature
  29   private boolean assertMode;
  30
  31   public boolean useAssertAsAnIndentifier = false;
  32
  33   //flag indicating if processed source contains occurrences of keyword assert
  34   public boolean containsAssertKeyword = false;
  35
  36   public boolean recordLineSeparator;
  37
  38   public boolean ignorePHPOneLiner = false;
  39
  40   public boolean phpMode = false;
  41
  42   public boolean phpExpressionTag = false;
  43
  44 //  public Stack encapsedStringStack = null;
  45
  46   public char currentCharacter;
  47
  48   public int startPosition;
  49
  50   public int currentPosition;
  51
  52   public int initialPosition, eofPosition;
  53
  54   // after this position eof are generated instead of real token from the
  55   // source
  56   public boolean tokenizeComments;
  57
  58   public boolean tokenizeWhiteSpace;
  59
  60   public boolean tokenizeStrings;
  61
  62   //source should be viewed as a window (aka a part)
  63   //of a entire very large stream
  64   public char source[];
  65
  66   //unicode support
  67   public char[] withoutUnicodeBuffer;
  68
  69   public int withoutUnicodePtr;
  70
  71   //when == 0 ==> no unicode in the current token
  72   public boolean unicodeAsBackSlash = false;
  73
  74   public boolean scanningFloatLiteral = false;
  75
  76   //support for /** comments
  77   public int[] commentStops = new int[10];
  78
  79   public int[] commentStarts = new int[10];
  80
  81   public int commentPtr = -1; // no comment test with commentPtr value -1
  82
  83   protected int lastCommentLinePosition = -1;
  84
  85   //diet parsing support - jump over some method body when requested
  86   public boolean diet = false;
  87
  88   //support for the poor-line-debuggers ....
  89   //remember the position of the cr/lf
  90   public int[] lineEnds = new int[250];
  91
  92   public int linePtr = -1;
  93
  94   public boolean wasAcr = false;
  95
  96   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
  97
  98   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
  99
 100   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
 101
 102   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
 103
 104   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
 105
 106   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
 107
 108   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
 109
 110   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
 111
 112   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
 113
 114   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
 115
 116   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
 117
 118   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
 119
 120   //----------------optimized identifier managment------------------
 121   static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
 122       charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
 123       charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
 124       charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
 125       charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
 126       charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
 127       charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
 128       charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
 129       charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
 130
 131   static final char[] charArray_va = new char[] { '$', 'a' }, charArray_vb = new char[] { '$', 'b' }, charArray_vc = new char[] {
 132       '$',
 133       'c' }, charArray_vd = new char[] { '$', 'd' }, charArray_ve = new char[] { '$', 'e' },
 134       charArray_vf = new char[] { '$', 'f' }, charArray_vg = new char[] { '$', 'g' }, charArray_vh = new char[] { '$', 'h' },
 135       charArray_vi = new char[] { '$', 'i' }, charArray_vj = new char[] { '$', 'j' }, charArray_vk = new char[] { '$', 'k' },
 136       charArray_vl = new char[] { '$', 'l' }, charArray_vm = new char[] { '$', 'm' }, charArray_vn = new char[] { '$', 'n' },
 137       charArray_vo = new char[] { '$', 'o' }, charArray_vp = new char[] { '$', 'p' }, charArray_vq = new char[] { '$', 'q' },
 138       charArray_vr = new char[] { '$', 'r' }, charArray_vs = new char[] { '$', 's' }, charArray_vt = new char[] { '$', 't' },
 139       charArray_vu = new char[] { '$', 'u' }, charArray_vv = new char[] { '$', 'v' }, charArray_vw = new char[] { '$', 'w' },
 140       charArray_vx = new char[] { '$', 'x' }, charArray_vy = new char[] { '$', 'y' }, charArray_vz = new char[] { '$', 'z' };
 141
 142   static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
 143
 144   static final int TableSize = 30, InternalTableSize = 6;
 145
 146   //30*6 = 180 entries
 147   public static final int OptimizedLength = 6;
 148
 149   public/* static */
 150   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
 151
 152   // support for detecting non-externalized string literals
 153   int currentLineNr = -1;
 154
 155   int previousLineNr = -1;
 156
 157   NLSLine currentLine = null;
 158
 159   List lines = new ArrayList();
 160
 161   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
 162
 163   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
 164
 165   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
 166
 167   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
 168
 169   public StringLiteral[] nonNLSStrings = null;
 170
 171   public boolean checkNonExternalizedStringLiterals = true;
 172
 173   public boolean wasNonExternalizedStringLiteral = false;
 174   /* static */{
 175     for (int i = 0; i < 6; i++) {
 176       for (int j = 0; j < TableSize; j++) {
 177         for (int k = 0; k < InternalTableSize; k++) {
 178           charArray_length[i][j][k] = initCharArray;
 179         }
 180       }
 181     }
 182   }
 183
 184   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
 185
 186   public static final int RoundBracket = 0;
 187
 188   public static final int SquareBracket = 1;
 189
 190   public static final int CurlyBracket = 2;
 191
 192   public static final int BracketKinds = 3;
 193
 194   // task tag support
 195   public char[][] foundTaskTags = null;
 196
 197   public char[][] foundTaskMessages;
 198
 199   public char[][] foundTaskPriorities = null;
 200
 201   public int[][] foundTaskPositions;
 202
 203   public int foundTaskCount = 0;
 204
 205   public char[][] taskTags = null;
 206
 207   public char[][] taskPriorities = null;
 208
 209   public boolean isTaskCaseSensitive = true;
 210
 211   public static final boolean DEBUG = false;
 212
 213   public static final boolean TRACE = false;
 214
 215   public ICompilationUnit compilationUnit = null;
 216
 217   /**
 218    * Determines if the specified character is permissible as the first character in a PHP identifier or variable
 219    *
 220    * The '$' character for PHP variables is regarded as a correct first character !
 221    *
 222    */
 223   public static boolean isPHPIdentOrVarStart(char ch) {
 224     return Character.isLetter(ch) || (ch == '$') || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 225   }
 226
 227   /**
 228    * Determines if the specified character is permissible as the first character in a PHP identifier.
 229    *
 230    * The '$' character for PHP variables isn't regarded as the first character !
 231    */
 232   public static boolean isPHPIdentifierStart(char ch) {
 233     return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 234   }
 235
 236   /**
 237    * Determines if the specified character may be part of a PHP identifier as other than the first character
 238    */
 239   public static boolean isPHPIdentifierPart(char ch) {
 240     return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 241   }
 242
 243   public final boolean atEnd() {
 244     // This code is not relevant if source is
 245     // Only a part of the real stream input
 246     return source.length == currentPosition;
 247   }
 248
 249   public char[] getCurrentIdentifierSource() {
 250     //return the token REAL source (aka unicodes are precomputed)
 251     char[] result;
 252     //    if (withoutUnicodePtr != 0)
 253     //      //0 is used as a fast test flag so the real first char is in position 1
 254     //      System.arraycopy(
 255     //        withoutUnicodeBuffer,
 256     //        1,
 257     //        result = new char[withoutUnicodePtr],
 258     //        0,
 259     //        withoutUnicodePtr);
 260     //    else {
 261     int length = currentPosition - startPosition;
 262     switch (length) { // see OptimizedLength
 263     case 1:
 264       return optimizedCurrentTokenSource1();
 265     case 2:
 266       return optimizedCurrentTokenSource2();
 267     case 3:
 268       return optimizedCurrentTokenSource3();
 269     case 4:
 270       return optimizedCurrentTokenSource4();
 271     case 5:
 272       return optimizedCurrentTokenSource5();
 273     case 6:
 274       return optimizedCurrentTokenSource6();
 275     }
 276     //no optimization
 277     System.arraycopy(source, startPosition, result = new char[length], 0, length);
 278     //   }
 279     return result;
 280   }
 281
 282   public int getCurrentTokenEndPosition() {
 283     return this.currentPosition - 1;
 284   }
 285
 286   public final char[] getCurrentTokenSource() {
 287     // Return the token REAL source (aka unicodes are precomputed)
 288     char[] result;
 289     //    if (withoutUnicodePtr != 0)
 290     //      // 0 is used as a fast test flag so the real first char is in position 1
 291     //      System.arraycopy(
 292     //        withoutUnicodeBuffer,
 293     //        1,
 294     //        result = new char[withoutUnicodePtr],
 295     //        0,
 296     //        withoutUnicodePtr);
 297     //    else {
 298     int length;
 299     System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
 300     //    }
 301     return result;
 302   }
 303
 304   public final char[] getCurrentTokenSource(int startPos) {
 305     // Return the token REAL source (aka unicodes are precomputed)
 306     char[] result;
 307     //    if (withoutUnicodePtr != 0)
 308     //      // 0 is used as a fast test flag so the real first char is in position 1
 309     //      System.arraycopy(
 310     //        withoutUnicodeBuffer,
 311     //        1,
 312     //        result = new char[withoutUnicodePtr],
 313     //        0,
 314     //        withoutUnicodePtr);
 315     //    else {
 316     int length;
 317     System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
 318     //  }
 319     return result;
 320   }
 321
 322   public final char[] getCurrentTokenSourceString() {
 323     //return the token REAL source (aka unicodes are precomputed).
 324     //REMOVE the two " that are at the beginning and the end.
 325     char[] result;
 326     if (withoutUnicodePtr != 0)
 327       //0 is used as a fast test flag so the real first char is in position 1
 328       System.arraycopy(withoutUnicodeBuffer, 2,
 329       //2 is 1 (real start) + 1 (to jump over the ")
 330           result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
 331     else {
 332       int length;
 333       System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 334     }
 335     return result;
 336   }
 337
 338   public final char[] getRawTokenSourceEnd() {
 339     int length = this.eofPosition - this.currentPosition - 1;
 340     char[] sourceEnd = new char[length];
 341     System.arraycopy(this.source, this.currentPosition, sourceEnd, 0, length);
 342     return sourceEnd;
 343   }
 344
 345   public int getCurrentTokenStartPosition() {
 346     return this.startPosition;
 347   }
 348
 349   public final char[] getCurrentStringLiteralSource() {
 350     // Return the token REAL source (aka unicodes are precomputed)
 351     if (startPosition + 1 >= currentPosition) {
 352       return new char[0];
 353     }
 354     char[] result;
 355     int length;
 356     System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 357     //    }
 358     return result;
 359   }
 360
 361   public final char[] getCurrentStringLiteralSource(int startPos) {
 362     // Return the token REAL source (aka unicodes are precomputed)
 363     char[] result;
 364     int length;
 365     System.arraycopy(source, startPos + 1, result = new char[length = currentPosition - startPos - 2], 0, length);
 366     //    }
 367     return result;
 368   }
 369
 370   /*
 371    * Search the source position corresponding to the end of a given line number
 372    *
 373    * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
 374    *
 375    * In case the given line number is inconsistent, answers -1.
 376    */
 377   public final int getLineEnd(int lineNumber) {
 378     if (lineEnds == null)
 379       return -1;
 380     if (lineNumber >= lineEnds.length)
 381       return -1;
 382     if (lineNumber <= 0)
 383       return -1;
 384     if (lineNumber == lineEnds.length - 1)
 385       return eofPosition;
 386     return lineEnds[lineNumber - 1];
 387     // next line start one character behind the lineEnd of the previous line
 388   }
 389
 390   /**
 391    * Search the source position corresponding to the beginning of a given line number
 392    *
 393    * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
 394    *
 395    * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
 396    *
 397    * In case the given line number is inconsistent, answers -1.
 398    */
 399   public final int getLineStart(int lineNumber) {
 400     if (lineEnds == null)
 401       return -1;
 402     if (lineNumber >= lineEnds.length)
 403       return -1;
 404     if (lineNumber <= 0)
 405       return -1;
 406     if (lineNumber == 1)
 407       return initialPosition;
 408     return lineEnds[lineNumber - 2] + 1;
 409     // next line start one character behind the lineEnd of the previous line
 410   }
 411
 412   public final boolean getNextChar(char testedChar) {
 413     //BOOLEAN
 414     //handle the case of unicode.
 415     //when a unicode appears then we must use a buffer that holds char
 416     // internal values
 417     //At the end of this method currentCharacter holds the new visited char
 418     //and currentPosition points right next after it
 419     //Both previous lines are true if the currentCharacter is == to the
 420     // testedChar
 421     //On false, no side effect has occured.
 422     //ALL getNextChar.... ARE OPTIMIZED COPIES
 423     int temp = currentPosition;
 424     try {
 425       currentCharacter = source[currentPosition++];
 426       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 427       //        && (source[currentPosition] == 'u')) {
 428       //        //-------------unicode traitement ------------
 429       //        int c1, c2, c3, c4;
 430       //        int unicodeSize = 6;
 431       //        currentPosition++;
 432       //        while (source[currentPosition] == 'u') {
 433       //          currentPosition++;
 434       //          unicodeSize++;
 435       //        }
 436       //
 437       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 438       //          || c1 < 0)
 439       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 440       //            || c2 < 0)
 441       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 442       //            || c3 < 0)
 443       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 444       //            || c4 < 0)) {
 445       //          currentPosition = temp;
 446       //          return false;
 447       //        }
 448       //
 449       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 450       //        if (currentCharacter != testedChar) {
 451       //          currentPosition = temp;
 452       //          return false;
 453       //        }
 454       //        unicodeAsBackSlash = currentCharacter == '\\';
 455       //
 456       //        //need the unicode buffer
 457       //        if (withoutUnicodePtr == 0) {
 458       //          //buffer all the entries that have been left aside....
 459       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 460       //          System.arraycopy(
 461       //            source,
 462       //            startPosition,
 463       //            withoutUnicodeBuffer,
 464       //            1,
 465       //            withoutUnicodePtr);
 466       //        }
 467       //        //fill the buffer with the char
 468       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 469       //        return true;
 470       //
 471       //      } //-------------end unicode traitement--------------
 472       //      else {
 473       if (currentCharacter != testedChar) {
 474         currentPosition = temp;
 475         return false;
 476       }
 477       unicodeAsBackSlash = false;
 478       //        if (withoutUnicodePtr != 0)
 479       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 480       return true;
 481       //      }
 482     } catch (IndexOutOfBoundsException e) {
 483       unicodeAsBackSlash = false;
 484       currentPosition = temp;
 485       return false;
 486     }
 487   }
 488
 489   public final int getNextChar(char testedChar1, char testedChar2) {
 490     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
 491     //test can be done with (x==0) for the first and (x>0) for the second
 492     //handle the case of unicode.
 493     //when a unicode appears then we must use a buffer that holds char
 494     // internal values
 495     //At the end of this method currentCharacter holds the new visited char
 496     //and currentPosition points right next after it
 497     //Both previous lines are true if the currentCharacter is == to the
 498     // testedChar1/2
 499     //On false, no side effect has occured.
 500     //ALL getNextChar.... ARE OPTIMIZED COPIES
 501     int temp = currentPosition;
 502     try {
 503       int result;
 504       currentCharacter = source[currentPosition++];
 505       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 506       //        && (source[currentPosition] == 'u')) {
 507       //        //-------------unicode traitement ------------
 508       //        int c1, c2, c3, c4;
 509       //        int unicodeSize = 6;
 510       //        currentPosition++;
 511       //        while (source[currentPosition] == 'u') {
 512       //          currentPosition++;
 513       //          unicodeSize++;
 514       //        }
 515       //
 516       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 517       //          || c1 < 0)
 518       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 519       //            || c2 < 0)
 520       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 521       //            || c3 < 0)
 522       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 523       //            || c4 < 0)) {
 524       //          currentPosition = temp;
 525       //          return 2;
 526       //        }
 527       //
 528       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 529       //        if (currentCharacter == testedChar1)
 530       //          result = 0;
 531       //        else if (currentCharacter == testedChar2)
 532       //          result = 1;
 533       //        else {
 534       //          currentPosition = temp;
 535       //          return -1;
 536       //        }
 537       //
 538       //        //need the unicode buffer
 539       //        if (withoutUnicodePtr == 0) {
 540       //          //buffer all the entries that have been left aside....
 541       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 542       //          System.arraycopy(
 543       //            source,
 544       //            startPosition,
 545       //            withoutUnicodeBuffer,
 546       //            1,
 547       //            withoutUnicodePtr);
 548       //        }
 549       //        //fill the buffer with the char
 550       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 551       //        return result;
 552       //      } //-------------end unicode traitement--------------
 553       //      else {
 554       if (currentCharacter == testedChar1)
 555         result = 0;
 556       else if (currentCharacter == testedChar2)
 557         result = 1;
 558       else {
 559         currentPosition = temp;
 560         return -1;
 561       }
 562       //        if (withoutUnicodePtr != 0)
 563       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 564       return result;
 565       //     }
 566     } catch (IndexOutOfBoundsException e) {
 567       currentPosition = temp;
 568       return -1;
 569     }
 570   }
 571
 572   public final boolean getNextCharAsDigit() {
 573     //BOOLEAN
 574     //handle the case of unicode.
 575     //when a unicode appears then we must use a buffer that holds char
 576     // internal values
 577     //At the end of this method currentCharacter holds the new visited char
 578     //and currentPosition points right next after it
 579     //Both previous lines are true if the currentCharacter is a digit
 580     //On false, no side effect has occured.
 581     //ALL getNextChar.... ARE OPTIMIZED COPIES
 582     int temp = currentPosition;
 583     try {
 584       currentCharacter = source[currentPosition++];
 585       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 586       //        && (source[currentPosition] == 'u')) {
 587       //        //-------------unicode traitement ------------
 588       //        int c1, c2, c3, c4;
 589       //        int unicodeSize = 6;
 590       //        currentPosition++;
 591       //        while (source[currentPosition] == 'u') {
 592       //          currentPosition++;
 593       //          unicodeSize++;
 594       //        }
 595       //
 596       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 597       //          || c1 < 0)
 598       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 599       //            || c2 < 0)
 600       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 601       //            || c3 < 0)
 602       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 603       //            || c4 < 0)) {
 604       //          currentPosition = temp;
 605       //          return false;
 606       //        }
 607       //
 608       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 609       //        if (!Character.isDigit(currentCharacter)) {
 610       //          currentPosition = temp;
 611       //          return false;
 612       //        }
 613       //
 614       //        //need the unicode buffer
 615       //        if (withoutUnicodePtr == 0) {
 616       //          //buffer all the entries that have been left aside....
 617       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 618       //          System.arraycopy(
 619       //            source,
 620       //            startPosition,
 621       //            withoutUnicodeBuffer,
 622       //            1,
 623       //            withoutUnicodePtr);
 624       //        }
 625       //        //fill the buffer with the char
 626       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 627       //        return true;
 628       //      } //-------------end unicode traitement--------------
 629       //      else {
 630       if (!Character.isDigit(currentCharacter)) {
 631         currentPosition = temp;
 632         return false;
 633       }
 634       //        if (withoutUnicodePtr != 0)
 635       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 636       return true;
 637       //      }
 638     } catch (IndexOutOfBoundsException e) {
 639       currentPosition = temp;
 640       return false;
 641     }
 642   }
 643
 644   public final boolean getNextCharAsDigit(int radix) {
 645     //BOOLEAN
 646     //handle the case of unicode.
 647     //when a unicode appears then we must use a buffer that holds char
 648     // internal values
 649     //At the end of this method currentCharacter holds the new visited char
 650     //and currentPosition points right next after it
 651     //Both previous lines are true if the currentCharacter is a digit base on
 652     // radix
 653     //On false, no side effect has occured.
 654     //ALL getNextChar.... ARE OPTIMIZED COPIES
 655     int temp = currentPosition;
 656     try {
 657       currentCharacter = source[currentPosition++];
 658       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 659       //        && (source[currentPosition] == 'u')) {
 660       //        //-------------unicode traitement ------------
 661       //        int c1, c2, c3, c4;
 662       //        int unicodeSize = 6;
 663       //        currentPosition++;
 664       //        while (source[currentPosition] == 'u') {
 665       //          currentPosition++;
 666       //          unicodeSize++;
 667       //        }
 668       //
 669       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 670       //          || c1 < 0)
 671       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 672       //            || c2 < 0)
 673       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 674       //            || c3 < 0)
 675       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 676       //            || c4 < 0)) {
 677       //          currentPosition = temp;
 678       //          return false;
 679       //        }
 680       //
 681       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 682       //        if (Character.digit(currentCharacter, radix) == -1) {
 683       //          currentPosition = temp;
 684       //          return false;
 685       //        }
 686       //
 687       //        //need the unicode buffer
 688       //        if (withoutUnicodePtr == 0) {
 689       //          //buffer all the entries that have been left aside....
 690       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 691       //          System.arraycopy(
 692       //            source,
 693       //            startPosition,
 694       //            withoutUnicodeBuffer,
 695       //            1,
 696       //            withoutUnicodePtr);
 697       //        }
 698       //        //fill the buffer with the char
 699       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 700       //        return true;
 701       //      } //-------------end unicode traitement--------------
 702       //      else {
 703       if (Character.digit(currentCharacter, radix) == -1) {
 704         currentPosition = temp;
 705         return false;
 706       }
 707       //        if (withoutUnicodePtr != 0)
 708       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 709       return true;
 710       //      }
 711     } catch (IndexOutOfBoundsException e) {
 712       currentPosition = temp;
 713       return false;
 714     }
 715   }
 716
 717   public boolean getNextCharAsJavaIdentifierPart() {
 718     //BOOLEAN
 719     //handle the case of unicode.
 720     //when a unicode appears then we must use a buffer that holds char
 721     // internal values
 722     //At the end of this method currentCharacter holds the new visited char
 723     //and currentPosition points right next after it
 724     //Both previous lines are true if the currentCharacter is a
 725     // JavaIdentifierPart
 726     //On false, no side effect has occured.
 727     //ALL getNextChar.... ARE OPTIMIZED COPIES
 728     int temp = currentPosition;
 729     try {
 730       currentCharacter = source[currentPosition++];
 731       //      if (((currentCharacter = source[currentPosition++]) == '\\')
 732       //        && (source[currentPosition] == 'u')) {
 733       //        //-------------unicode traitement ------------
 734       //        int c1, c2, c3, c4;
 735       //        int unicodeSize = 6;
 736       //        currentPosition++;
 737       //        while (source[currentPosition] == 'u') {
 738       //          currentPosition++;
 739       //          unicodeSize++;
 740       //        }
 741       //
 742       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 743       //          || c1 < 0)
 744       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 745       //            || c2 < 0)
 746       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 747       //            || c3 < 0)
 748       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 749       //            || c4 < 0)) {
 750       //          currentPosition = temp;
 751       //          return false;
 752       //        }
 753       //
 754       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 755       //        if (!isPHPIdentifierPart(currentCharacter)) {
 756       //          currentPosition = temp;
 757       //          return false;
 758       //        }
 759       //
 760       //        //need the unicode buffer
 761       //        if (withoutUnicodePtr == 0) {
 762       //          //buffer all the entries that have been left aside....
 763       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 764       //          System.arraycopy(
 765       //            source,
 766       //            startPosition,
 767       //            withoutUnicodeBuffer,
 768       //            1,
 769       //            withoutUnicodePtr);
 770       //        }
 771       //        //fill the buffer with the char
 772       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 773       //        return true;
 774       //      } //-------------end unicode traitement--------------
 775       //      else {
 776       if (!isPHPIdentifierPart(currentCharacter)) {
 777         currentPosition = temp;
 778         return false;
 779       }
 780       //        if (withoutUnicodePtr != 0)
 781       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 782       return true;
 783       //      }
 784     } catch (IndexOutOfBoundsException e) {
 785       currentPosition = temp;
 786       return false;
 787     }
 788   }
 789
 790   public int getCastOrParen() {
 791     int tempPosition = currentPosition;
 792     char tempCharacter = currentCharacter;
 793     int tempToken = TokenNameLPAREN;
 794     boolean found = false;
 795     StringBuffer buf = new StringBuffer();
 796     try {
 797       do {
 798         currentCharacter = source[currentPosition++];
 799       } while (currentCharacter == ' ' || currentCharacter == '\t');
 800       while ((currentCharacter >= 'a' && currentCharacter <= 'z') || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
 801         buf.append(currentCharacter);
 802         currentCharacter = source[currentPosition++];
 803       }
 804       if (buf.length() >= 3 && buf.length() <= 7) {
 805         char[] data = buf.toString().toCharArray();
 806         int index = 0;
 807         switch (data.length) {
 808         case 3:
 809           // int
 810           if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
 811             found = true;
 812             tempToken = TokenNameintCAST;
 813           }
 814           break;
 815         case 4:
 816           // bool real
 817           if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
 818             found = true;
 819             tempToken = TokenNameboolCAST;
 820           } else {
 821             index = 0;
 822             if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
 823               found = true;
 824               tempToken = TokenNamedoubleCAST;
 825             }
 826           }
 827           break;
 828         case 5:
 829           // array unset float
 830           if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
 831               && (data[++index] == 'y')) {
 832             found = true;
 833             tempToken = TokenNamearrayCAST;
 834           } else {
 835             index = 0;
 836             if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
 837                 && (data[++index] == 't')) {
 838               found = true;
 839               tempToken = TokenNameunsetCAST;
 840             } else {
 841               index = 0;
 842               if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
 843                   && (data[++index] == 't')) {
 844                 found = true;
 845                 tempToken = TokenNamedoubleCAST;
 846               }
 847             }
 848           }
 849           break;
 850         case 6:
 851           // object string double
 852           if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
 853               && (data[++index] == 'c') && (data[++index] == 't')) {
 854             found = true;
 855             tempToken = TokenNameobjectCAST;
 856           } else {
 857             index = 0;
 858             if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
 859                 && (data[++index] == 'n') && (data[++index] == 'g')) {
 860               found = true;
 861               tempToken = TokenNamestringCAST;
 862             } else {
 863               index = 0;
 864               if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
 865                   && (data[++index] == 'l') && (data[++index] == 'e')) {
 866                 found = true;
 867                 tempToken = TokenNamedoubleCAST;
 868               }
 869             }
 870           }
 871           break;
 872         case 7:
 873           // boolean integer
 874           if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
 875               && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
 876             found = true;
 877             tempToken = TokenNameboolCAST;
 878           } else {
 879             index = 0;
 880             if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
 881                 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
 882               found = true;
 883               tempToken = TokenNameintCAST;
 884             }
 885           }
 886           break;
 887         }
 888         if (found) {
 889           while (currentCharacter == ' ' || currentCharacter == '\t') {
 890             currentCharacter = source[currentPosition++];
 891           }
 892           if (currentCharacter == ')') {
 893             return tempToken;
 894           }
 895         }
 896       }
 897     } catch (IndexOutOfBoundsException e) {
 898     }
 899     currentCharacter = tempCharacter;
 900     currentPosition = tempPosition;
 901     return TokenNameLPAREN;
 902   }
 903
 904   public void consumeStringInterpolated() throws InvalidInputException {
 905     try {
 906       // consume next character
 907       unicodeAsBackSlash = false;
 908       currentCharacter = source[currentPosition++];
 909       //                if (((currentCharacter = source[currentPosition++]) == '\\')
 910       //                  && (source[currentPosition] == 'u')) {
 911       //                  getNextUnicodeChar();
 912       //                } else {
 913       //                  if (withoutUnicodePtr != 0) {
 914       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
 915       //                      currentCharacter;
 916       //                  }
 917       //                }
 918       while (currentCharacter != '`') {
 919         /** ** in PHP \r and \n are valid in string literals *** */
 920         //                if ((currentCharacter == '\n')
 921         //                  || (currentCharacter == '\r')) {
 922         //                  // relocate if finding another quote fairly close: thus unicode
 923         // '/u000D' will be fully consumed
 924         //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 925         //                    if (currentPosition + lookAhead == source.length)
 926         //                      break;
 927         //                    if (source[currentPosition + lookAhead] == '\n')
 928         //                      break;
 929         //                    if (source[currentPosition + lookAhead] == '\"') {
 930         //                      currentPosition += lookAhead + 1;
 931         //                      break;
 932         //                    }
 933         //                  }
 934         //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
 935         //                }
 936         if (currentCharacter == '\\') {
 937           int escapeSize = currentPosition;
 938           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
 939           //scanEscapeCharacter make a side effect on this value and we need
 940           // the previous value few lines down this one
 941           scanDoubleQuotedEscapeCharacter();
 942           escapeSize = currentPosition - escapeSize;
 943           if (withoutUnicodePtr == 0) {
 944             //buffer all the entries that have been left aside....
 945             withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
 946             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
 947             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 948           } else { //overwrite the / in the buffer
 949             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
 950             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
 951               // where only one is correct
 952               withoutUnicodePtr--;
 953             }
 954           }
 955         } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 956           if (recordLineSeparator) {
 957             pushLineSeparator();
 958           }
 959         }
 960         // consume next character
 961         unicodeAsBackSlash = false;
 962         currentCharacter = source[currentPosition++];
 963         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 964         //                    && (source[currentPosition] == 'u')) {
 965         //                    getNextUnicodeChar();
 966         //                  } else {
 967         if (withoutUnicodePtr != 0) {
 968           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 969         }
 970         //                  }
 971       }
 972     } catch (IndexOutOfBoundsException e) {
 973       //    reset end position for error reporting
 974       currentPosition -= 2;
 975       throw new InvalidInputException(UNTERMINATED_STRING);
 976     } catch (InvalidInputException e) {
 977       if (e.getMessage().equals(INVALID_ESCAPE)) {
 978         // relocate if finding another quote fairly close: thus unicode
 979         // '/u000D' will be fully consumed
 980         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 981           if (currentPosition + lookAhead == source.length)
 982             break;
 983           if (source[currentPosition + lookAhead] == '\n')
 984             break;
 985           if (source[currentPosition + lookAhead] == '`') {
 986             currentPosition += lookAhead + 1;
 987             break;
 988           }
 989         }
 990       }
 991       throw e; // rethrow
 992     }
 993     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
 994       // //$NON-NLS-?$ where ? is an
 995       // int.
 996       if (currentLine == null) {
 997         currentLine = new NLSLine();
 998         lines.add(currentLine);
 999       }
1000       currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1001     }
1002   }
1003
1004   public void consumeStringConstant() throws InvalidInputException {
1005     try {
1006       // consume next character
1007       unicodeAsBackSlash = false;
1008       currentCharacter = source[currentPosition++];
1009       //                if (((currentCharacter = source[currentPosition++]) == '\\')
1010       //                  && (source[currentPosition] == 'u')) {
1011       //                  getNextUnicodeChar();
1012       //                } else {
1013       //                  if (withoutUnicodePtr != 0) {
1014       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1015       //                      currentCharacter;
1016       //                  }
1017       //                }
1018       while (currentCharacter != '\'') {
1019         /** ** in PHP \r and \n are valid in string literals *** */
1020         //                  if ((currentCharacter == '\n')
1021         //                    || (currentCharacter == '\r')) {
1022         //                    // relocate if finding another quote fairly close: thus unicode
1023         // '/u000D' will be fully consumed
1024         //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1025         //                      if (currentPosition + lookAhead == source.length)
1026         //                        break;
1027         //                      if (source[currentPosition + lookAhead] == '\n')
1028         //                        break;
1029         //                      if (source[currentPosition + lookAhead] == '\"') {
1030         //                        currentPosition += lookAhead + 1;
1031         //                        break;
1032         //                      }
1033         //                    }
1034         //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1035         //                  }
1036         if (currentCharacter == '\\') {
1037           int escapeSize = currentPosition;
1038           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1039           //scanEscapeCharacter make a side effect on this value and we need
1040           // the previous value few lines down this one
1041           scanSingleQuotedEscapeCharacter();
1042           escapeSize = currentPosition - escapeSize;
1043           if (withoutUnicodePtr == 0) {
1044             //buffer all the entries that have been left aside....
1045             withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1046             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1047             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1048           } else { //overwrite the / in the buffer
1049             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1050             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1051               // where only one is correct
1052               withoutUnicodePtr--;
1053             }
1054           }
1055         } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1056           if (recordLineSeparator) {
1057             pushLineSeparator();
1058           }
1059         }
1060         // consume next character
1061         unicodeAsBackSlash = false;
1062         currentCharacter = source[currentPosition++];
1063         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1064         //                    && (source[currentPosition] == 'u')) {
1065         //                    getNextUnicodeChar();
1066         //                  } else {
1067         if (withoutUnicodePtr != 0) {
1068           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1069         }
1070         //                  }
1071       }
1072     } catch (IndexOutOfBoundsException e) {
1073       // reset end position for error reporting
1074       currentPosition -= 2;
1075       throw new InvalidInputException(UNTERMINATED_STRING);
1076     } catch (InvalidInputException e) {
1077       if (e.getMessage().equals(INVALID_ESCAPE)) {
1078         // relocate if finding another quote fairly close: thus unicode
1079         // '/u000D' will be fully consumed
1080         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1081           if (currentPosition + lookAhead == source.length)
1082             break;
1083           if (source[currentPosition + lookAhead] == '\n')
1084             break;
1085           if (source[currentPosition + lookAhead] == '\'') {
1086             currentPosition += lookAhead + 1;
1087             break;
1088           }
1089         }
1090       }
1091       throw e; // rethrow
1092     }
1093     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1094       // //$NON-NLS-?$ where ? is an
1095       // int.
1096       if (currentLine == null) {
1097         currentLine = new NLSLine();
1098         lines.add(currentLine);
1099       }
1100       currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1101     }
1102   }
1103
1104   public void consumeStringLiteral() throws InvalidInputException {
1105     try {
1106       boolean openDollarBrace = false;
1107       // consume next character
1108       unicodeAsBackSlash = false;
1109       currentCharacter = source[currentPosition++];
1110       while (currentCharacter != '"' || openDollarBrace) {
1111         /** ** in PHP \r and \n are valid in string literals *** */
1112         if (currentCharacter == '\\') {
1113           int escapeSize = currentPosition;
1114           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1115           //scanEscapeCharacter make a side effect on this value and we need
1116           // the previous value few lines down this one
1117           scanDoubleQuotedEscapeCharacter();
1118           escapeSize = currentPosition - escapeSize;
1119           if (withoutUnicodePtr == 0) {
1120             //buffer all the entries that have been left aside....
1121             withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1122             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1123             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1124           } else { //overwrite the / in the buffer
1125             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1126             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1127               // where only one is correct
1128               withoutUnicodePtr--;
1129             }
1130           }
1131         } else if (currentCharacter == '$' && source[currentPosition] == '{') {
1132           openDollarBrace = true;
1133         } else if (currentCharacter == '{' && source[currentPosition] == '$') {
1134           openDollarBrace = true;
1135         } else if (currentCharacter == '}') {
1136           openDollarBrace = false;
1137         } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1138           if (recordLineSeparator) {
1139             pushLineSeparator();
1140           }
1141         }
1142         // consume next character
1143         unicodeAsBackSlash = false;
1144         currentCharacter = source[currentPosition++];
1145         if (withoutUnicodePtr != 0) {
1146           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1147         }
1148       }
1149     } catch (IndexOutOfBoundsException e) {
1150       //    reset end position for error reporting
1151       currentPosition -= 2;
1152       throw new InvalidInputException(UNTERMINATED_STRING);
1153     } catch (InvalidInputException e) {
1154       if (e.getMessage().equals(INVALID_ESCAPE)) {
1155         // relocate if finding another quote fairly close: thus unicode
1156         // '/u000D' will be fully consumed
1157         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1158           if (currentPosition + lookAhead == source.length)
1159             break;
1160           if (source[currentPosition + lookAhead] == '\n')
1161             break;
1162           if (source[currentPosition + lookAhead] == '\"') {
1163             currentPosition += lookAhead + 1;
1164             break;
1165           }
1166         }
1167       }
1168       throw e; // rethrow
1169     }
1170     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1171       // //$NON-NLS-?$ where ? is an
1172       // int.
1173       if (currentLine == null) {
1174         currentLine = new NLSLine();
1175         lines.add(currentLine);
1176       }
1177       currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1178     }
1179   }
1180
1181   public int getNextToken() throws InvalidInputException {
1182     phpExpressionTag = false;
1183     if (!phpMode) {
1184       return getInlinedHTMLToken(currentPosition);
1185     }
1186     if (phpMode) {
1187       this.wasAcr = false;
1188       if (diet) {
1189         jumpOverMethodBody();
1190         diet = false;
1191         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1192       }
1193       try {
1194         while (true) {
1195           withoutUnicodePtr = 0;
1196           //start with a new token
1197           char encapsedChar = ' ';
1198           //          if (!encapsedStringStack.isEmpty()) {
1199           //            encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
1200           //          }
1201           //          if (encapsedChar != '$' && encapsedChar != ' ') {
1202           //            currentCharacter = source[currentPosition++];
1203           //            if (currentCharacter == encapsedChar) {
1204           //              switch (currentCharacter) {
1205           //              case '`':
1206           //                return TokenNameEncapsedString0;
1207           //              case '\'':
1208           //                return TokenNameEncapsedString1;
1209           //              case '"':
1210           //                return TokenNameEncapsedString2;
1211           //              }
1212           //            }
1213           //            while (currentCharacter != encapsedChar) {
1214           //              /** ** in PHP \r and \n are valid in string literals *** */
1215           //              switch (currentCharacter) {
1216           //              case '\\':
1217           //                int escapeSize = currentPosition;
1218           //                boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1219           //                //scanEscapeCharacter make a side effect on this value and
1220           //                // we need the previous value few lines down this one
1221           //                scanDoubleQuotedEscapeCharacter();
1222           //                escapeSize = currentPosition - escapeSize;
1223           //                if (withoutUnicodePtr == 0) {
1224           //                  //buffer all the entries that have been left aside....
1225           //                  withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1226           //                  System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1227           //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1228           //                } else { //overwrite the / in the buffer
1229           //                  withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1230           //                  if (backSlashAsUnicodeInString) { //there are TWO \ in
1231           //                    withoutUnicodePtr--;
1232           //                  }
1233           //                }
1234           //                break;
1235           //              case '\r':
1236           //              case '\n':
1237           //                if (recordLineSeparator) {
1238           //                  pushLineSeparator();
1239           //                }
1240           //                break;
1241           //              case '$':
1242           //                if (isPHPIdentifierStart(source[currentPosition]) || source[currentPosition] == '{') {
1243           //                  currentPosition--;
1244           //                  encapsedStringStack.push(new Character('$'));
1245           //                  return TokenNameSTRING;
1246           //                }
1247           //                break;
1248           //              case '{':
1249           //                if (source[currentPosition] == '$') { // CURLY_OPEN
1250           //                  currentPosition--;
1251           //                  encapsedStringStack.push(new Character('$'));
1252           //                  return TokenNameSTRING;
1253           //                }
1254           //              }
1255           //              // consume next character
1256           //              unicodeAsBackSlash = false;
1257           //              currentCharacter = source[currentPosition++];
1258           //              if (withoutUnicodePtr != 0) {
1259           //                withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1260           //              }
1261           //              // }
1262           //            } // end while
1263           //            currentPosition--;
1264           //            return TokenNameSTRING;
1265           //          }
1266           // ---------Consume white space and handles startPosition---------
1267           int whiteStart = currentPosition;
1268           startPosition = currentPosition;
1269           currentCharacter = source[currentPosition++];
1270           //          if (encapsedChar == '$') {
1271           //            switch (currentCharacter) {
1272           //            case '\\':
1273           //              currentCharacter = source[currentPosition++];
1274           //              return TokenNameSTRING;
1275           //            case '{':
1276           //              if (encapsedChar == '$') {
1277           //                if (getNextChar('$'))
1278           //                  return TokenNameLBRACE_DOLLAR;
1279           //              }
1280           //              return TokenNameLBRACE;
1281           //            case '}':
1282           //              return TokenNameRBRACE;
1283           //            case '[':
1284           //              return TokenNameLBRACKET;
1285           //            case ']':
1286           //              return TokenNameRBRACKET;
1287           //            case '\'':
1288           //              if (tokenizeStrings) {
1289           //                consumeStringConstant();
1290           //                return TokenNameStringSingleQuote;
1291           //              }
1292           //              return TokenNameEncapsedString1;
1293           //            case '"':
1294           //              return TokenNameEncapsedString2;
1295           //            case '`':
1296           //              if (tokenizeStrings) {
1297           //                consumeStringInterpolated();
1298           //                return TokenNameStringInterpolated;
1299           //              }
1300           //              return TokenNameEncapsedString0;
1301           //            case '-':
1302           //              if (getNextChar('>'))
1303           //                return TokenNameMINUS_GREATER;
1304           //              return TokenNameSTRING;
1305           //            default:
1306           //              if (currentCharacter == '$') {
1307           //                int oldPosition = currentPosition;
1308           //                try {
1309           //                  currentCharacter = source[currentPosition++];
1310           //                  if (currentCharacter == '{') {
1311           //                    return TokenNameDOLLAR_LBRACE;
1312           //                  }
1313           //                  if (isPHPIdentifierStart(currentCharacter)) {
1314           //                    return scanIdentifierOrKeyword(true);
1315           //                  } else {
1316           //                    currentPosition = oldPosition;
1317           //                    return TokenNameSTRING;
1318           //                  }
1319           //                } catch (IndexOutOfBoundsException e) {
1320           //                  currentPosition = oldPosition;
1321           //                  return TokenNameSTRING;
1322           //                }
1323           //              }
1324           //              if (isPHPIdentifierStart(currentCharacter))
1325           //                return scanIdentifierOrKeyword(false);
1326           //              if (Character.isDigit(currentCharacter))
1327           //                return scanNumber(false);
1328           //              return TokenNameERROR;
1329           //            }
1330           //          }
1331           //          boolean isWhiteSpace;
1332
1333           while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1334             startPosition = currentPosition;
1335             currentCharacter = source[currentPosition++];
1336             //            if (((currentCharacter = source[currentPosition++]) == '\\')
1337             //              && (source[currentPosition] == 'u')) {
1338             //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
1339             //            } else {
1340             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1341               checkNonExternalizeString();
1342               if (recordLineSeparator) {
1343                 pushLineSeparator();
1344               } else {
1345                 currentLine = null;
1346               }
1347             }
1348             //            isWhiteSpace = (currentCharacter == ' ')
1349             //                || Character.isWhitespace(currentCharacter);
1350             //            }
1351           }
1352           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1353             // reposition scanner in case we are interested by spaces as tokens
1354             currentPosition--;
1355             startPosition = whiteStart;
1356             return TokenNameWHITESPACE;
1357           }
1358           //little trick to get out in the middle of a source compuation
1359           if (currentPosition > eofPosition)
1360             return TokenNameEOF;
1361           // ---------Identify the next token-------------
1362           switch (currentCharacter) {
1363           case '(':
1364             return getCastOrParen();
1365           case ')':
1366             return TokenNameRPAREN;
1367           case '{':
1368             return TokenNameLBRACE;
1369           case '}':
1370             return TokenNameRBRACE;
1371           case '[':
1372             return TokenNameLBRACKET;
1373           case ']':
1374             return TokenNameRBRACKET;
1375           case ';':
1376             return TokenNameSEMICOLON;
1377           case ',':
1378             return TokenNameCOMMA;
1379           case '.':
1380             if (getNextChar('='))
1381               return TokenNameDOT_EQUAL;
1382             if (getNextCharAsDigit())
1383               return scanNumber(true);
1384             return TokenNameDOT;
1385           case '+': {
1386             int test;
1387             if ((test = getNextChar('+', '=')) == 0)
1388               return TokenNamePLUS_PLUS;
1389             if (test > 0)
1390               return TokenNamePLUS_EQUAL;
1391             return TokenNamePLUS;
1392           }
1393           case '-': {
1394             int test;
1395             if ((test = getNextChar('-', '=')) == 0)
1396               return TokenNameMINUS_MINUS;
1397             if (test > 0)
1398               return TokenNameMINUS_EQUAL;
1399             if (getNextChar('>'))
1400               return TokenNameMINUS_GREATER;
1401             return TokenNameMINUS;
1402           }
1403           case '~':
1404             if (getNextChar('='))
1405               return TokenNameTWIDDLE_EQUAL;
1406             return TokenNameTWIDDLE;
1407           case '!':
1408             if (getNextChar('=')) {
1409               if (getNextChar('=')) {
1410                 return TokenNameNOT_EQUAL_EQUAL;
1411               }
1412               return TokenNameNOT_EQUAL;
1413             }
1414             return TokenNameNOT;
1415           case '*':
1416             if (getNextChar('='))
1417               return TokenNameMULTIPLY_EQUAL;
1418             return TokenNameMULTIPLY;
1419           case '%':
1420             if (getNextChar('='))
1421               return TokenNameREMAINDER_EQUAL;
1422             return TokenNameREMAINDER;
1423           case '<': {
1424             int oldPosition = currentPosition;
1425             try {
1426               currentCharacter = source[currentPosition++];
1427             } catch (IndexOutOfBoundsException e) {
1428               currentPosition = oldPosition;
1429               return TokenNameLESS;
1430             }
1431             switch (currentCharacter) {
1432             case '=':
1433               return TokenNameLESS_EQUAL;
1434             case '>':
1435               return TokenNameNOT_EQUAL;
1436             case '<':
1437               if (getNextChar('='))
1438                 return TokenNameLEFT_SHIFT_EQUAL;
1439               if (getNextChar('<')) {
1440                 currentCharacter = source[currentPosition++];
1441                 while (Character.isWhitespace(currentCharacter)) {
1442                   currentCharacter = source[currentPosition++];
1443                 }
1444                 int heredocStart = currentPosition - 1;
1445                 int heredocLength = 0;
1446                 if (isPHPIdentifierStart(currentCharacter)) {
1447                   currentCharacter = source[currentPosition++];
1448                 } else {
1449                   return TokenNameERROR;
1450                 }
1451                 while (isPHPIdentifierPart(currentCharacter)) {
1452                   currentCharacter = source[currentPosition++];
1453                 }
1454                 heredocLength = currentPosition - heredocStart - 1;
1455                 // heredoc end-tag determination
1456                 boolean endTag = true;
1457                 char ch;
1458                 do {
1459                   ch = source[currentPosition++];
1460                   if (ch == '\r' || ch == '\n') {
1461                     if (recordLineSeparator) {
1462                       pushLineSeparator();
1463                     } else {
1464                       currentLine = null;
1465                     }
1466                     for (int i = 0; i < heredocLength; i++) {
1467                       if (source[currentPosition + i] != source[heredocStart + i]) {
1468                         endTag = false;
1469                         break;
1470                       }
1471                     }
1472                     if (endTag) {
1473                       currentPosition += heredocLength - 1;
1474                       currentCharacter = source[currentPosition++];
1475                       break; // do...while loop
1476                     } else {
1477                       endTag = true;
1478                     }
1479                   }
1480                 } while (true);
1481                 return TokenNameHEREDOC;
1482               }
1483               return TokenNameLEFT_SHIFT;
1484             }
1485             currentPosition = oldPosition;
1486             return TokenNameLESS;
1487           }
1488           case '>': {
1489             int test;
1490             if ((test = getNextChar('=', '>')) == 0)
1491               return TokenNameGREATER_EQUAL;
1492             if (test > 0) {
1493               if ((test = getNextChar('=', '>')) == 0)
1494                 return TokenNameRIGHT_SHIFT_EQUAL;
1495               return TokenNameRIGHT_SHIFT;
1496             }
1497             return TokenNameGREATER;
1498           }
1499           case '=':
1500             if (getNextChar('=')) {
1501               if (getNextChar('=')) {
1502                 return TokenNameEQUAL_EQUAL_EQUAL;
1503               }
1504               return TokenNameEQUAL_EQUAL;
1505             }
1506             if (getNextChar('>'))
1507               return TokenNameEQUAL_GREATER;
1508             return TokenNameEQUAL;
1509           case '&': {
1510             int test;
1511             if ((test = getNextChar('&', '=')) == 0)
1512               return TokenNameAND_AND;
1513             if (test > 0)
1514               return TokenNameAND_EQUAL;
1515             return TokenNameAND;
1516           }
1517           case '|': {
1518             int test;
1519             if ((test = getNextChar('|', '=')) == 0)
1520               return TokenNameOR_OR;
1521             if (test > 0)
1522               return TokenNameOR_EQUAL;
1523             return TokenNameOR;
1524           }
1525           case '^':
1526             if (getNextChar('='))
1527               return TokenNameXOR_EQUAL;
1528             return TokenNameXOR;
1529           case '?':
1530             if (getNextChar('>')) {
1531               phpMode = false;
1532               if (currentPosition == source.length) {
1533                 phpMode = true;
1534                 return TokenNameINLINE_HTML;
1535               }
1536               return getInlinedHTMLToken(currentPosition - 2);
1537             }
1538             return TokenNameQUESTION;
1539           case ':':
1540             if (getNextChar(':'))
1541               return TokenNamePAAMAYIM_NEKUDOTAYIM;
1542             return TokenNameCOLON;
1543           case '@':
1544             return TokenNameAT;
1545           case '\'':
1546             consumeStringConstant();
1547             return TokenNameStringSingleQuote;
1548           case '"':
1549 //            if (tokenizeStrings) {
1550               consumeStringLiteral();
1551               return TokenNameStringDoubleQuote;
1552 //            }
1553 //            return TokenNameEncapsedString2;
1554           case '`':
1555 //            if (tokenizeStrings) {
1556               consumeStringInterpolated();
1557               return TokenNameStringInterpolated;
1558 //            }
1559 //            return TokenNameEncapsedString0;
1560           case '#':
1561           case '/': {
1562             char startChar = currentCharacter;
1563             if (getNextChar('=') && startChar == '/') {
1564               return TokenNameDIVIDE_EQUAL;
1565             }
1566             int test;
1567             if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1568               //line comment
1569               this.lastCommentLinePosition = this.currentPosition;
1570               int endPositionForLineComment = 0;
1571               try { //get the next char
1572                 currentCharacter = source[currentPosition++];
1573                 //                    if (((currentCharacter = source[currentPosition++])
1574                 //                      == '\\')
1575                 //                      && (source[currentPosition] == 'u')) {
1576                 //                      //-------------unicode traitement ------------
1577                 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1578                 //                      currentPosition++;
1579                 //                      while (source[currentPosition] == 'u') {
1580                 //                        currentPosition++;
1581                 //                      }
1582                 //                      if ((c1 =
1583                 //                        Character.getNumericValue(source[currentPosition++]))
1584                 //                        > 15
1585                 //                        || c1 < 0
1586                 //                        || (c2 =
1587                 //                          Character.getNumericValue(source[currentPosition++]))
1588                 //                          > 15
1589                 //                        || c2 < 0
1590                 //                        || (c3 =
1591                 //                          Character.getNumericValue(source[currentPosition++]))
1592                 //                          > 15
1593                 //                        || c3 < 0
1594                 //                        || (c4 =
1595                 //                          Character.getNumericValue(source[currentPosition++]))
1596                 //                          > 15
1597                 //                        || c4 < 0) {
1598                 //                        throw new
1599                 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1600                 //                      } else {
1601                 //                        currentCharacter =
1602                 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1603                 //                      }
1604                 //                    }
1605                 //handle the \\u case manually into comment
1606                 //                    if (currentCharacter == '\\') {
1607                 //                      if (source[currentPosition] == '\\')
1608                 //                        currentPosition++;
1609                 //                    } //jump over the \\
1610                 boolean isUnicode = false;
1611                 while (currentCharacter != '\r' && currentCharacter != '\n') {
1612                   this.lastCommentLinePosition = this.currentPosition;
1613                   if (currentCharacter == '?') {
1614                     if (getNextChar('>')) {
1615                       // ?> breaks line comments
1616                       startPosition = currentPosition - 2;
1617                       phpMode = false;
1618                       return TokenNameINLINE_HTML;
1619                     }
1620                   }
1621                   //get the next char
1622                   isUnicode = false;
1623                   currentCharacter = source[currentPosition++];
1624                   //                      if (((currentCharacter = source[currentPosition++])
1625                   //                        == '\\')
1626                   //                        && (source[currentPosition] == 'u')) {
1627                   //                        isUnicode = true;
1628                   //                        //-------------unicode traitement ------------
1629                   //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1630                   //                        currentPosition++;
1631                   //                        while (source[currentPosition] == 'u') {
1632                   //                          currentPosition++;
1633                   //                        }
1634                   //                        if ((c1 =
1635                   //                          Character.getNumericValue(source[currentPosition++]))
1636                   //                          > 15
1637                   //                          || c1 < 0
1638                   //                          || (c2 =
1639                   //                            Character.getNumericValue(
1640                   //                              source[currentPosition++]))
1641                   //                            > 15
1642                   //                          || c2 < 0
1643                   //                          || (c3 =
1644                   //                            Character.getNumericValue(
1645                   //                              source[currentPosition++]))
1646                   //                            > 15
1647                   //                          || c3 < 0
1648                   //                          || (c4 =
1649                   //                            Character.getNumericValue(
1650                   //                              source[currentPosition++]))
1651                   //                            > 15
1652                   //                          || c4 < 0) {
1653                   //                          throw new
1654                   // InvalidInputException(INVALID_UNICODE_ESCAPE);
1655                   //                        } else {
1656                   //                          currentCharacter =
1657                   //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1658                   //                        }
1659                   //                      }
1660                   //handle the \\u case manually into comment
1661                   //                      if (currentCharacter == '\\') {
1662                   //                        if (source[currentPosition] == '\\')
1663                   //                          currentPosition++;
1664                   //                      } //jump over the \\
1665                 }
1666                 if (isUnicode) {
1667                   endPositionForLineComment = currentPosition - 6;
1668                 } else {
1669                   endPositionForLineComment = currentPosition - 1;
1670                 }
1671                 //                    recordComment(false);
1672                 recordComment(TokenNameCOMMENT_LINE);
1673                 if (this.taskTags != null)
1674                   checkTaskTag(this.startPosition, this.currentPosition);
1675                 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1676                   checkNonExternalizeString();
1677                   if (recordLineSeparator) {
1678                     if (isUnicode) {
1679                       pushUnicodeLineSeparator();
1680                     } else {
1681                       pushLineSeparator();
1682                     }
1683                   } else {
1684                     currentLine = null;
1685                   }
1686                 }
1687                 if (tokenizeComments) {
1688                   if (!isUnicode) {
1689                     currentPosition = endPositionForLineComment;
1690                     // reset one character behind
1691                   }
1692                   return TokenNameCOMMENT_LINE;
1693                 }
1694               } catch (IndexOutOfBoundsException e) { //an eof will them
1695                 // be generated
1696                 if (tokenizeComments) {
1697                   currentPosition--;
1698                   // reset one character behind
1699                   return TokenNameCOMMENT_LINE;
1700                 }
1701               }
1702               break;
1703             }
1704             if (test > 0) {
1705               //traditional and annotation comment
1706               boolean isJavadoc = false, star = false;
1707               // consume next character
1708               unicodeAsBackSlash = false;
1709               currentCharacter = source[currentPosition++];
1710               //                  if (((currentCharacter = source[currentPosition++]) ==
1711               // '\\')
1712               //                    && (source[currentPosition] == 'u')) {
1713               //                    getNextUnicodeChar();
1714               //                  } else {
1715               //                    if (withoutUnicodePtr != 0) {
1716               //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1717               //                        currentCharacter;
1718               //                    }
1719               //                  }
1720               if (currentCharacter == '*') {
1721                 isJavadoc = true;
1722                 star = true;
1723               }
1724               if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1725                 checkNonExternalizeString();
1726                 if (recordLineSeparator) {
1727                   pushLineSeparator();
1728                 } else {
1729                   currentLine = null;
1730                 }
1731               }
1732               try { //get the next char
1733                 currentCharacter = source[currentPosition++];
1734                 //                    if (((currentCharacter = source[currentPosition++])
1735                 //                      == '\\')
1736                 //                      && (source[currentPosition] == 'u')) {
1737                 //                      //-------------unicode traitement ------------
1738                 //                      getNextUnicodeChar();
1739                 //                    }
1740                 //handle the \\u case manually into comment
1741                 //                    if (currentCharacter == '\\') {
1742                 //                      if (source[currentPosition] == '\\')
1743                 //                        currentPosition++;
1744                 //                      //jump over the \\
1745                 //                    }
1746                 // empty comment is not a javadoc /**/
1747                 if (currentCharacter == '/') {
1748                   isJavadoc = false;
1749                 }
1750                 //loop until end of comment */
1751                 while ((currentCharacter != '/') || (!star)) {
1752                   if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1753                     checkNonExternalizeString();
1754                     if (recordLineSeparator) {
1755                       pushLineSeparator();
1756                     } else {
1757                       currentLine = null;
1758                     }
1759                   }
1760                   star = currentCharacter == '*';
1761                   //get next char
1762                   currentCharacter = source[currentPosition++];
1763                   //                      if (((currentCharacter = source[currentPosition++])
1764                   //                        == '\\')
1765                   //                        && (source[currentPosition] == 'u')) {
1766                   //                        //-------------unicode traitement ------------
1767                   //                        getNextUnicodeChar();
1768                   //                      }
1769                   //handle the \\u case manually into comment
1770                   //                      if (currentCharacter == '\\') {
1771                   //                        if (source[currentPosition] == '\\')
1772                   //                          currentPosition++;
1773                   //                      } //jump over the \\
1774                 }
1775                 //recordComment(isJavadoc);
1776                 if (isJavadoc) {
1777                   recordComment(TokenNameCOMMENT_PHPDOC);
1778                 } else {
1779                   recordComment(TokenNameCOMMENT_BLOCK);
1780                 }
1781
1782                 if (tokenizeComments) {
1783                   if (isJavadoc)
1784                     return TokenNameCOMMENT_PHPDOC;
1785                   return TokenNameCOMMENT_BLOCK;
1786                 }
1787
1788                 if (this.taskTags != null) {
1789                   checkTaskTag(this.startPosition, this.currentPosition);
1790                 }
1791               } catch (IndexOutOfBoundsException e) {
1792                 //                  reset end position for error reporting
1793                 currentPosition -= 2;
1794                 throw new InvalidInputException(UNTERMINATED_COMMENT);
1795               }
1796               break;
1797             }
1798             return TokenNameDIVIDE;
1799           }
1800           case '\u001a':
1801             if (atEnd())
1802               return TokenNameEOF;
1803             //the atEnd may not be <currentPosition == source.length> if
1804             // source is only some part of a real (external) stream
1805             throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1806           default:
1807             if (currentCharacter == '$') {
1808               int oldPosition = currentPosition;
1809               try {
1810                 currentCharacter = source[currentPosition++];
1811                 if (isPHPIdentifierStart(currentCharacter)) {
1812                   return scanIdentifierOrKeyword(true);
1813                 } else {
1814                   currentPosition = oldPosition;
1815                   return TokenNameDOLLAR;
1816                 }
1817               } catch (IndexOutOfBoundsException e) {
1818                 currentPosition = oldPosition;
1819                 return TokenNameDOLLAR;
1820               }
1821             }
1822             if (isPHPIdentifierStart(currentCharacter))
1823               return scanIdentifierOrKeyword(false);
1824             if (Character.isDigit(currentCharacter))
1825               return scanNumber(false);
1826             return TokenNameERROR;
1827           }
1828         }
1829       } //-----------------end switch while try--------------------
1830       catch (IndexOutOfBoundsException e) {
1831       }
1832     }
1833     return TokenNameEOF;
1834   }
1835
1836   /**
1837    * @return
1838    * @throws InvalidInputException
1839    */
1840   private int getInlinedHTMLToken(int start) throws InvalidInputException {
1841     if (currentPosition > source.length) {
1842       currentPosition = source.length;
1843       return TokenNameEOF;
1844     }
1845     startPosition = start;
1846     try {
1847       while (!phpMode) {
1848         currentCharacter = source[currentPosition++];
1849         if (currentCharacter == '<') {
1850           if (getNextChar('?')) {
1851             currentCharacter = source[currentPosition++];
1852             if ((currentCharacter != 'P') && (currentCharacter != 'p')) {
1853               if (currentCharacter != '=') { // <?=
1854                 currentPosition--;
1855               } else {
1856                 phpExpressionTag = true;
1857               }
1858               // <?
1859               if (ignorePHPOneLiner) { // for CodeFormatter
1860                 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1861                   phpMode = true;
1862                   return TokenNameINLINE_HTML;
1863                 }
1864               } else {
1865                 phpMode = true;
1866                 return TokenNameINLINE_HTML;
1867               }
1868             } else {
1869               //              boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
1870               //              if (phpStart) {
1871               int test = getNextChar('H', 'h');
1872               if (test >= 0) {
1873                 test = getNextChar('P', 'p');
1874                 if (test >= 0) {
1875                   // <?PHP <?php
1876                   if (ignorePHPOneLiner) {
1877                     if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1878                       phpMode = true;
1879                       return TokenNameINLINE_HTML;
1880                     }
1881                   } else {
1882                     phpMode = true;
1883                     return TokenNameINLINE_HTML;
1884                   }
1885                 }
1886               }
1887               //              }
1888             }
1889           }
1890         }
1891         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1892           if (recordLineSeparator) {
1893             pushLineSeparator();
1894           } else {
1895             currentLine = null;
1896           }
1897         }
1898       } //-----------------while--------------------
1899       phpMode = true;
1900       return TokenNameINLINE_HTML;
1901     } //-----------------try--------------------
1902     catch (IndexOutOfBoundsException e) {
1903       startPosition = start;
1904       currentPosition--;
1905     }
1906     phpMode = true;
1907     return TokenNameINLINE_HTML;
1908   }
1909
1910   /**
1911    * @return
1912    */
1913   private int lookAheadLinePHPTag() {
1914     // check if the PHP is only in this line (for CodeFormatter)
1915     int currentPositionInLine = currentPosition;
1916     char previousCharInLine = ' ';
1917     char currentCharInLine = ' ';
1918     boolean singleQuotedStringActive = false;
1919     boolean doubleQuotedStringActive = false;
1920
1921     try {
1922       // look ahead in this line
1923       while (true) {
1924         previousCharInLine = currentCharInLine;
1925         currentCharInLine = source[currentPositionInLine++];
1926         switch (currentCharInLine) {
1927         case '>':
1928           if (previousCharInLine == '?') {
1929             // update the scanner's current Position in the source
1930             currentPosition = currentPositionInLine;
1931             // use as "dummy" token
1932             return TokenNameEOF;
1933           }
1934           break;
1935         case '\\':
1936           if (doubleQuotedStringActive) {
1937             // ignore escaped characters in double quoted strings
1938             previousCharInLine = currentCharInLine;
1939             currentCharInLine = source[currentPositionInLine++];
1940           }
1941         case '\"':
1942           if (doubleQuotedStringActive) {
1943             doubleQuotedStringActive = false;
1944           } else {
1945             if (!singleQuotedStringActive) {
1946               doubleQuotedStringActive = true;
1947             }
1948           }
1949           break;
1950         case '\'':
1951           if (singleQuotedStringActive) {
1952             if (previousCharInLine != '\\') {
1953               singleQuotedStringActive = false;
1954             }
1955           } else {
1956             if (!doubleQuotedStringActive) {
1957               singleQuotedStringActive = true;
1958             }
1959           }
1960           break;
1961         case '\n':
1962           phpMode = true;
1963           return TokenNameINLINE_HTML;
1964         case '#':
1965           if (!singleQuotedStringActive && !doubleQuotedStringActive) {
1966             phpMode = true;
1967             return TokenNameINLINE_HTML;
1968           }
1969           break;
1970         case '/':
1971           if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1972             phpMode = true;
1973             return TokenNameINLINE_HTML;
1974           }
1975           break;
1976         case '*':
1977           if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1978             phpMode = true;
1979             return TokenNameINLINE_HTML;
1980           }
1981           break;
1982         }
1983       }
1984     } catch (IndexOutOfBoundsException e) {
1985       phpMode = true;
1986       currentPosition = currentPositionInLine;
1987       return TokenNameINLINE_HTML;
1988     }
1989   }
1990
1991   //  public final void getNextUnicodeChar()
1992   //    throws IndexOutOfBoundsException, InvalidInputException {
1993   //    //VOID
1994   //    //handle the case of unicode.
1995   //    //when a unicode appears then we must use a buffer that holds char
1996   // internal values
1997   //    //At the end of this method currentCharacter holds the new visited char
1998   //    //and currentPosition points right next after it
1999   //
2000   //    //ALL getNextChar.... ARE OPTIMIZED COPIES
2001   //
2002   //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
2003   //    currentPosition++;
2004   //    while (source[currentPosition] == 'u') {
2005   //      currentPosition++;
2006   //      unicodeSize++;
2007   //    }
2008   //
2009   //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2010   //      || c1 < 0
2011   //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2012   //      || c2 < 0
2013   //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2014   //      || c3 < 0
2015   //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2016   //      || c4 < 0) {
2017   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2018   //    } else {
2019   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2020   //      //need the unicode buffer
2021   //      if (withoutUnicodePtr == 0) {
2022   //        //buffer all the entries that have been left aside....
2023   //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2024   //        System.arraycopy(
2025   //          source,
2026   //          startPosition,
2027   //          withoutUnicodeBuffer,
2028   //          1,
2029   //          withoutUnicodePtr);
2030   //      }
2031   //      //fill the buffer with the char
2032   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2033   //    }
2034   //    unicodeAsBackSlash = currentCharacter == '\\';
2035   //  }
2036   /*
2037    * Tokenize a method body, assuming that curly brackets are properly balanced.
2038    */
2039   public final void jumpOverMethodBody() {
2040     this.wasAcr = false;
2041     int found = 1;
2042     try {
2043       while (true) { //loop for jumping over comments
2044         // ---------Consume white space and handles startPosition---------
2045         boolean isWhiteSpace;
2046         do {
2047           startPosition = currentPosition;
2048           currentCharacter = source[currentPosition++];
2049           //          if (((currentCharacter = source[currentPosition++]) == '\\')
2050           //            && (source[currentPosition] == 'u')) {
2051           //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
2052           //          } else {
2053           if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2054             pushLineSeparator();
2055           isWhiteSpace = Character.isWhitespace(currentCharacter);
2056           //          }
2057         } while (isWhiteSpace);
2058         // -------consume token until } is found---------
2059         switch (currentCharacter) {
2060         case '{':
2061           found++;
2062           break;
2063         case '}':
2064           found--;
2065           if (found == 0)
2066             return;
2067           break;
2068         case '\'': {
2069           boolean test;
2070           test = getNextChar('\\');
2071           if (test) {
2072             try {
2073               scanDoubleQuotedEscapeCharacter();
2074             } catch (InvalidInputException ex) {
2075             }
2076             ;
2077           } else {
2078             //                try { // consume next character
2079             unicodeAsBackSlash = false;
2080             currentCharacter = source[currentPosition++];
2081             //                  if (((currentCharacter = source[currentPosition++]) == '\\')
2082             //                    && (source[currentPosition] == 'u')) {
2083             //                    getNextUnicodeChar();
2084             //                  } else {
2085             if (withoutUnicodePtr != 0) {
2086               withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2087             }
2088             //                  }
2089             //                } catch (InvalidInputException ex) {
2090             //                };
2091           }
2092           getNextChar('\'');
2093           break;
2094         }
2095         case '"':
2096           try {
2097             //              try { // consume next character
2098             unicodeAsBackSlash = false;
2099             currentCharacter = source[currentPosition++];
2100             //                if (((currentCharacter = source[currentPosition++]) == '\\')
2101             //                  && (source[currentPosition] == 'u')) {
2102             //                  getNextUnicodeChar();
2103             //                } else {
2104             if (withoutUnicodePtr != 0) {
2105               withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2106             }
2107             //                }
2108             //              } catch (InvalidInputException ex) {
2109             //              };
2110             while (currentCharacter != '"') {
2111               if (currentCharacter == '\r') {
2112                 if (source[currentPosition] == '\n')
2113                   currentPosition++;
2114                 break;
2115                 // the string cannot go further that the line
2116               }
2117               if (currentCharacter == '\n') {
2118                 break;
2119                 // the string cannot go further that the line
2120               }
2121               if (currentCharacter == '\\') {
2122                 try {
2123                   scanDoubleQuotedEscapeCharacter();
2124                 } catch (InvalidInputException ex) {
2125                 }
2126                 ;
2127               }
2128               //                try { // consume next character
2129               unicodeAsBackSlash = false;
2130               currentCharacter = source[currentPosition++];
2131               //                  if (((currentCharacter = source[currentPosition++]) == '\\')
2132               //                    && (source[currentPosition] == 'u')) {
2133               //                    getNextUnicodeChar();
2134               //                  } else {
2135               if (withoutUnicodePtr != 0) {
2136                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2137               }
2138               //                  }
2139               //                } catch (InvalidInputException ex) {
2140               //                };
2141             }
2142           } catch (IndexOutOfBoundsException e) {
2143             return;
2144           }
2145           break;
2146         case '/': {
2147           int test;
2148           if ((test = getNextChar('/', '*')) == 0) {
2149             //line comment
2150             try {
2151               //get the next char
2152               currentCharacter = source[currentPosition++];
2153               //                  if (((currentCharacter = source[currentPosition++]) ==
2154               // '\\')
2155               //                    && (source[currentPosition] == 'u')) {
2156               //                    //-------------unicode traitement ------------
2157               //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2158               //                    currentPosition++;
2159               //                    while (source[currentPosition] == 'u') {
2160               //                      currentPosition++;
2161               //                    }
2162               //                    if ((c1 =
2163               //                      Character.getNumericValue(source[currentPosition++]))
2164               //                      > 15
2165               //                      || c1 < 0
2166               //                      || (c2 =
2167               //                        Character.getNumericValue(source[currentPosition++]))
2168               //                        > 15
2169               //                      || c2 < 0
2170               //                      || (c3 =
2171               //                        Character.getNumericValue(source[currentPosition++]))
2172               //                        > 15
2173               //                      || c3 < 0
2174               //                      || (c4 =
2175               //                        Character.getNumericValue(source[currentPosition++]))
2176               //                        > 15
2177               //                      || c4 < 0) {
2178               //                      //error don't care of the value
2179               //                      currentCharacter = 'A';
2180               //                    } //something different from \n and \r
2181               //                    else {
2182               //                      currentCharacter =
2183               //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2184               //                    }
2185               //                  }
2186               while (currentCharacter != '\r' && currentCharacter != '\n') {
2187                 //get the next char
2188                 currentCharacter = source[currentPosition++];
2189                 //                    if (((currentCharacter = source[currentPosition++])
2190                 //                      == '\\')
2191                 //                      && (source[currentPosition] == 'u')) {
2192                 //                      //-------------unicode traitement ------------
2193                 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2194                 //                      currentPosition++;
2195                 //                      while (source[currentPosition] == 'u') {
2196                 //                        currentPosition++;
2197                 //                      }
2198                 //                      if ((c1 =
2199                 //                        Character.getNumericValue(source[currentPosition++]))
2200                 //                        > 15
2201                 //                        || c1 < 0
2202                 //                        || (c2 =
2203                 //                          Character.getNumericValue(source[currentPosition++]))
2204                 //                          > 15
2205                 //                        || c2 < 0
2206                 //                        || (c3 =
2207                 //                          Character.getNumericValue(source[currentPosition++]))
2208                 //                          > 15
2209                 //                        || c3 < 0
2210                 //                        || (c4 =
2211                 //                          Character.getNumericValue(source[currentPosition++]))
2212                 //                          > 15
2213                 //                        || c4 < 0) {
2214                 //                        //error don't care of the value
2215                 //                        currentCharacter = 'A';
2216                 //                      } //something different from \n and \r
2217                 //                      else {
2218                 //                        currentCharacter =
2219                 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2220                 //                      }
2221                 //                    }
2222               }
2223               if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2224                 pushLineSeparator();
2225             } catch (IndexOutOfBoundsException e) {
2226             } //an eof will them be generated
2227             break;
2228           }
2229           if (test > 0) {
2230             //traditional and annotation comment
2231             boolean star = false;
2232             //                try { // consume next character
2233             unicodeAsBackSlash = false;
2234             currentCharacter = source[currentPosition++];
2235             //                  if (((currentCharacter = source[currentPosition++]) == '\\')
2236             //                    && (source[currentPosition] == 'u')) {
2237             //                    getNextUnicodeChar();
2238             //                  } else {
2239             if (withoutUnicodePtr != 0) {
2240               withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2241             }
2242             //                  };
2243             //                } catch (InvalidInputException ex) {
2244             //                };
2245             if (currentCharacter == '*') {
2246               star = true;
2247             }
2248             if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2249               pushLineSeparator();
2250             try { //get the next char
2251               currentCharacter = source[currentPosition++];
2252               //                  if (((currentCharacter = source[currentPosition++]) ==
2253               // '\\')
2254               //                    && (source[currentPosition] == 'u')) {
2255               //                    //-------------unicode traitement ------------
2256               //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2257               //                    currentPosition++;
2258               //                    while (source[currentPosition] == 'u') {
2259               //                      currentPosition++;
2260               //                    }
2261               //                    if ((c1 =
2262               //                      Character.getNumericValue(source[currentPosition++]))
2263               //                      > 15
2264               //                      || c1 < 0
2265               //                      || (c2 =
2266               //                        Character.getNumericValue(source[currentPosition++]))
2267               //                        > 15
2268               //                      || c2 < 0
2269               //                      || (c3 =
2270               //                        Character.getNumericValue(source[currentPosition++]))
2271               //                        > 15
2272               //                      || c3 < 0
2273               //                      || (c4 =
2274               //                        Character.getNumericValue(source[currentPosition++]))
2275               //                        > 15
2276               //                      || c4 < 0) {
2277               //                      //error don't care of the value
2278               //                      currentCharacter = 'A';
2279               //                    } //something different from * and /
2280               //                    else {
2281               //                      currentCharacter =
2282               //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2283               //                    }
2284               //                  }
2285               //loop until end of comment */
2286               while ((currentCharacter != '/') || (!star)) {
2287                 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2288                   pushLineSeparator();
2289                 star = currentCharacter == '*';
2290                 //get next char
2291                 currentCharacter = source[currentPosition++];
2292                 //                    if (((currentCharacter = source[currentPosition++])
2293                 //                      == '\\')
2294                 //                      && (source[currentPosition] == 'u')) {
2295                 //                      //-------------unicode traitement ------------
2296                 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2297                 //                      currentPosition++;
2298                 //                      while (source[currentPosition] == 'u') {
2299                 //                        currentPosition++;
2300                 //                      }
2301                 //                      if ((c1 =
2302                 //                        Character.getNumericValue(source[currentPosition++]))
2303                 //                        > 15
2304                 //                        || c1 < 0
2305                 //                        || (c2 =
2306                 //                          Character.getNumericValue(source[currentPosition++]))
2307                 //                          > 15
2308                 //                        || c2 < 0
2309                 //                        || (c3 =
2310                 //                          Character.getNumericValue(source[currentPosition++]))
2311                 //                          > 15
2312                 //                        || c3 < 0
2313                 //                        || (c4 =
2314                 //                          Character.getNumericValue(source[currentPosition++]))
2315                 //                          > 15
2316                 //                        || c4 < 0) {
2317                 //                        //error don't care of the value
2318                 //                        currentCharacter = 'A';
2319                 //                      } //something different from * and /
2320                 //                      else {
2321                 //                        currentCharacter =
2322                 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2323                 //                      }
2324                 //                    }
2325               }
2326             } catch (IndexOutOfBoundsException e) {
2327               return;
2328             }
2329             break;
2330           }
2331           break;
2332         }
2333         default:
2334           if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
2335             try {
2336               scanIdentifierOrKeyword((currentCharacter == '$'));
2337             } catch (InvalidInputException ex) {
2338             }
2339             ;
2340             break;
2341           }
2342           if (Character.isDigit(currentCharacter)) {
2343             try {
2344               scanNumber(false);
2345             } catch (InvalidInputException ex) {
2346             }
2347             ;
2348             break;
2349           }
2350         }
2351       }
2352       //-----------------end switch while try--------------------
2353     } catch (IndexOutOfBoundsException e) {
2354     } catch (InvalidInputException e) {
2355     }
2356     return;
2357   }
2358
2359   //  public final boolean jumpOverUnicodeWhiteSpace()
2360   //    throws InvalidInputException {
2361   //    //BOOLEAN
2362   //    //handle the case of unicode. Jump over the next whiteSpace
2363   //    //making startPosition pointing on the next available char
2364   //    //On false, the currentCharacter is filled up with a potential
2365   //    //correct char
2366   //
2367   //    try {
2368   //      this.wasAcr = false;
2369   //      int c1, c2, c3, c4;
2370   //      int unicodeSize = 6;
2371   //      currentPosition++;
2372   //      while (source[currentPosition] == 'u') {
2373   //        currentPosition++;
2374   //        unicodeSize++;
2375   //      }
2376   //
2377   //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2378   //        || c1 < 0)
2379   //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2380   //          || c2 < 0)
2381   //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2382   //          || c3 < 0)
2383   //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2384   //          || c4 < 0)) {
2385   //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2386   //      }
2387   //
2388   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2389   //      if (recordLineSeparator
2390   //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2391   //        pushLineSeparator();
2392   //      if (Character.isWhitespace(currentCharacter))
2393   //        return true;
2394   //
2395   //      //buffer the new char which is not a white space
2396   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2397   //      //withoutUnicodePtr == 1 is true here
2398   //      return false;
2399   //    } catch (IndexOutOfBoundsException e) {
2400   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2401   //    }
2402   //  }
2403   public final int[] getLineEnds() {
2404     //return a bounded copy of this.lineEnds
2405     int[] copy;
2406     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2407     return copy;
2408   }
2409
2410   public char[] getSource() {
2411     return this.source;
2412   }
2413
2414   public static boolean isIdentifierOrKeyword(int token) {
2415     return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2416   }
2417
2418   final char[] optimizedCurrentTokenSource1() {
2419     //return always the same char[] build only once
2420     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2421     char charOne = source[startPosition];
2422     switch (charOne) {
2423     case 'a':
2424       return charArray_a;
2425     case 'b':
2426       return charArray_b;
2427     case 'c':
2428       return charArray_c;
2429     case 'd':
2430       return charArray_d;
2431     case 'e':
2432       return charArray_e;
2433     case 'f':
2434       return charArray_f;
2435     case 'g':
2436       return charArray_g;
2437     case 'h':
2438       return charArray_h;
2439     case 'i':
2440       return charArray_i;
2441     case 'j':
2442       return charArray_j;
2443     case 'k':
2444       return charArray_k;
2445     case 'l':
2446       return charArray_l;
2447     case 'm':
2448       return charArray_m;
2449     case 'n':
2450       return charArray_n;
2451     case 'o':
2452       return charArray_o;
2453     case 'p':
2454       return charArray_p;
2455     case 'q':
2456       return charArray_q;
2457     case 'r':
2458       return charArray_r;
2459     case 's':
2460       return charArray_s;
2461     case 't':
2462       return charArray_t;
2463     case 'u':
2464       return charArray_u;
2465     case 'v':
2466       return charArray_v;
2467     case 'w':
2468       return charArray_w;
2469     case 'x':
2470       return charArray_x;
2471     case 'y':
2472       return charArray_y;
2473     case 'z':
2474       return charArray_z;
2475     default:
2476       return new char[] { charOne };
2477     }
2478   }
2479
2480   final char[] optimizedCurrentTokenSource2() {
2481     char c0, c1;
2482     c0 = source[startPosition];
2483     c1 = source[startPosition + 1];
2484     if (c0 == '$') {
2485       //return always the same char[] build only once
2486       //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2487       switch (c1) {
2488       case 'a':
2489         return charArray_va;
2490       case 'b':
2491         return charArray_vb;
2492       case 'c':
2493         return charArray_vc;
2494       case 'd':
2495         return charArray_vd;
2496       case 'e':
2497         return charArray_ve;
2498       case 'f':
2499         return charArray_vf;
2500       case 'g':
2501         return charArray_vg;
2502       case 'h':
2503         return charArray_vh;
2504       case 'i':
2505         return charArray_vi;
2506       case 'j':
2507         return charArray_vj;
2508       case 'k':
2509         return charArray_vk;
2510       case 'l':
2511         return charArray_vl;
2512       case 'm':
2513         return charArray_vm;
2514       case 'n':
2515         return charArray_vn;
2516       case 'o':
2517         return charArray_vo;
2518       case 'p':
2519         return charArray_vp;
2520       case 'q':
2521         return charArray_vq;
2522       case 'r':
2523         return charArray_vr;
2524       case 's':
2525         return charArray_vs;
2526       case 't':
2527         return charArray_vt;
2528       case 'u':
2529         return charArray_vu;
2530       case 'v':
2531         return charArray_vv;
2532       case 'w':
2533         return charArray_vw;
2534       case 'x':
2535         return charArray_vx;
2536       case 'y':
2537         return charArray_vy;
2538       case 'z':
2539         return charArray_vz;
2540       }
2541     }
2542     //try to return the same char[] build only once
2543     int hash = ((c0 << 6) + c1) % TableSize;
2544     char[][] table = charArray_length[0][hash];
2545     int i = newEntry2;
2546     while (++i < InternalTableSize) {
2547       char[] charArray = table[i];
2548       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2549         return charArray;
2550     }
2551     //---------other side---------
2552     i = -1;
2553     int max = newEntry2;
2554     while (++i <= max) {
2555       char[] charArray = table[i];
2556       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2557         return charArray;
2558     }
2559     //--------add the entry-------
2560     if (++max >= InternalTableSize)
2561       max = 0;
2562     char[] r;
2563     table[max] = (r = new char[] { c0, c1 });
2564     newEntry2 = max;
2565     return r;
2566   }
2567
2568   final char[] optimizedCurrentTokenSource3() {
2569     //try to return the same char[] build only once
2570     char c0, c1, c2;
2571     int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2572         % TableSize;
2573     char[][] table = charArray_length[1][hash];
2574     int i = newEntry3;
2575     while (++i < InternalTableSize) {
2576       char[] charArray = table[i];
2577       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2578         return charArray;
2579     }
2580     //---------other side---------
2581     i = -1;
2582     int max = newEntry3;
2583     while (++i <= max) {
2584       char[] charArray = table[i];
2585       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2586         return charArray;
2587     }
2588     //--------add the entry-------
2589     if (++max >= InternalTableSize)
2590       max = 0;
2591     char[] r;
2592     table[max] = (r = new char[] { c0, c1, c2 });
2593     newEntry3 = max;
2594     return r;
2595   }
2596
2597   final char[] optimizedCurrentTokenSource4() {
2598     //try to return the same char[] build only once
2599     char c0, c1, c2, c3;
2600     long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2601         + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2602         % TableSize;
2603     char[][] table = charArray_length[2][(int) hash];
2604     int i = newEntry4;
2605     while (++i < InternalTableSize) {
2606       char[] charArray = table[i];
2607       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2608         return charArray;
2609     }
2610     //---------other side---------
2611     i = -1;
2612     int max = newEntry4;
2613     while (++i <= max) {
2614       char[] charArray = table[i];
2615       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2616         return charArray;
2617     }
2618     //--------add the entry-------
2619     if (++max >= InternalTableSize)
2620       max = 0;
2621     char[] r;
2622     table[max] = (r = new char[] { c0, c1, c2, c3 });
2623     newEntry4 = max;
2624     return r;
2625   }
2626
2627   final char[] optimizedCurrentTokenSource5() {
2628     //try to return the same char[] build only once
2629     char c0, c1, c2, c3, c4;
2630     long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2631         + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2632         % TableSize;
2633     char[][] table = charArray_length[3][(int) hash];
2634     int i = newEntry5;
2635     while (++i < InternalTableSize) {
2636       char[] charArray = table[i];
2637       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2638         return charArray;
2639     }
2640     //---------other side---------
2641     i = -1;
2642     int max = newEntry5;
2643     while (++i <= max) {
2644       char[] charArray = table[i];
2645       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2646         return charArray;
2647     }
2648     //--------add the entry-------
2649     if (++max >= InternalTableSize)
2650       max = 0;
2651     char[] r;
2652     table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2653     newEntry5 = max;
2654     return r;
2655   }
2656
2657   final char[] optimizedCurrentTokenSource6() {
2658     //try to return the same char[] build only once
2659     char c0, c1, c2, c3, c4, c5;
2660     long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2661         + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2662         + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2663         % TableSize;
2664     char[][] table = charArray_length[4][(int) hash];
2665     int i = newEntry6;
2666     while (++i < InternalTableSize) {
2667       char[] charArray = table[i];
2668       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2669           && (c5 == charArray[5]))
2670         return charArray;
2671     }
2672     //---------other side---------
2673     i = -1;
2674     int max = newEntry6;
2675     while (++i <= max) {
2676       char[] charArray = table[i];
2677       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2678           && (c5 == charArray[5]))
2679         return charArray;
2680     }
2681     //--------add the entry-------
2682     if (++max >= InternalTableSize)
2683       max = 0;
2684     char[] r;
2685     table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2686     newEntry6 = max;
2687     return r;
2688   }
2689
2690   public final void pushLineSeparator() throws InvalidInputException {
2691     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2692     final int INCREMENT = 250;
2693     if (this.checkNonExternalizedStringLiterals) {
2694       // reinitialize the current line for non externalize strings purpose
2695       currentLine = null;
2696     }
2697     //currentCharacter is at position currentPosition-1
2698     // cr 000D
2699     if (currentCharacter == '\r') {
2700       int separatorPos = currentPosition - 1;
2701       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2702         return;
2703       //System.out.println("CR-" + separatorPos);
2704       try {
2705         lineEnds[++linePtr] = separatorPos;
2706       } catch (IndexOutOfBoundsException e) {
2707         //linePtr value is correct
2708         int oldLength = lineEnds.length;
2709         int[] old = lineEnds;
2710         lineEnds = new int[oldLength + INCREMENT];
2711         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2712         lineEnds[linePtr] = separatorPos;
2713       }
2714       // look-ahead for merged cr+lf
2715       try {
2716         if (source[currentPosition] == '\n') {
2717           //System.out.println("look-ahead LF-" + currentPosition);
2718           lineEnds[linePtr] = currentPosition;
2719           currentPosition++;
2720           wasAcr = false;
2721         } else {
2722           wasAcr = true;
2723         }
2724       } catch (IndexOutOfBoundsException e) {
2725         wasAcr = true;
2726       }
2727     } else {
2728       // lf 000A
2729       if (currentCharacter == '\n') {
2730         //must merge eventual cr followed by lf
2731         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2732           //System.out.println("merge LF-" + (currentPosition - 1));
2733           lineEnds[linePtr] = currentPosition - 1;
2734         } else {
2735           int separatorPos = currentPosition - 1;
2736           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2737             return;
2738           // System.out.println("LF-" + separatorPos);
2739           try {
2740             lineEnds[++linePtr] = separatorPos;
2741           } catch (IndexOutOfBoundsException e) {
2742             //linePtr value is correct
2743             int oldLength = lineEnds.length;
2744             int[] old = lineEnds;
2745             lineEnds = new int[oldLength + INCREMENT];
2746             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2747             lineEnds[linePtr] = separatorPos;
2748           }
2749         }
2750         wasAcr = false;
2751       }
2752     }
2753   }
2754
2755   public final void pushUnicodeLineSeparator() {
2756     // isUnicode means that the \r or \n has been read as a unicode character
2757     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2758     final int INCREMENT = 250;
2759     //currentCharacter is at position currentPosition-1
2760     if (this.checkNonExternalizedStringLiterals) {
2761       // reinitialize the current line for non externalize strings purpose
2762       currentLine = null;
2763     }
2764     // cr 000D
2765     if (currentCharacter == '\r') {
2766       int separatorPos = currentPosition - 6;
2767       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2768         return;
2769       //System.out.println("CR-" + separatorPos);
2770       try {
2771         lineEnds[++linePtr] = separatorPos;
2772       } catch (IndexOutOfBoundsException e) {
2773         //linePtr value is correct
2774         int oldLength = lineEnds.length;
2775         int[] old = lineEnds;
2776         lineEnds = new int[oldLength + INCREMENT];
2777         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2778         lineEnds[linePtr] = separatorPos;
2779       }
2780       // look-ahead for merged cr+lf
2781       if (source[currentPosition] == '\n') {
2782         //System.out.println("look-ahead LF-" + currentPosition);
2783         lineEnds[linePtr] = currentPosition;
2784         currentPosition++;
2785         wasAcr = false;
2786       } else {
2787         wasAcr = true;
2788       }
2789     } else {
2790       // lf 000A
2791       if (currentCharacter == '\n') {
2792         //must merge eventual cr followed by lf
2793         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2794           //System.out.println("merge LF-" + (currentPosition - 1));
2795           lineEnds[linePtr] = currentPosition - 6;
2796         } else {
2797           int separatorPos = currentPosition - 6;
2798           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2799             return;
2800           // System.out.println("LF-" + separatorPos);
2801           try {
2802             lineEnds[++linePtr] = separatorPos;
2803           } catch (IndexOutOfBoundsException e) {
2804             //linePtr value is correct
2805             int oldLength = lineEnds.length;
2806             int[] old = lineEnds;
2807             lineEnds = new int[oldLength + INCREMENT];
2808             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2809             lineEnds[linePtr] = separatorPos;
2810           }
2811         }
2812         wasAcr = false;
2813       }
2814     }
2815   }
2816
2817   public void recordComment(int token) {
2818     // compute position
2819     int stopPosition = this.currentPosition;
2820     switch (token) {
2821     case TokenNameCOMMENT_LINE:
2822       stopPosition = -this.lastCommentLinePosition;
2823       break;
2824     case TokenNameCOMMENT_BLOCK:
2825       stopPosition = -this.currentPosition;
2826       break;
2827     }
2828
2829     // a new comment is recorded
2830     int length = this.commentStops.length;
2831     if (++this.commentPtr >= length) {
2832       System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2833       //grows the positions buffers too
2834       System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2835     }
2836     this.commentStops[this.commentPtr] = stopPosition;
2837     this.commentStarts[this.commentPtr] = this.startPosition;
2838   }
2839
2840   //  public final void recordComment(boolean isJavadoc) {
2841   //    // a new annotation comment is recorded
2842   //    try {
2843   //      commentStops[++commentPtr] = isJavadoc
2844   //          ? currentPosition
2845   //          : -currentPosition;
2846   //    } catch (IndexOutOfBoundsException e) {
2847   //      int oldStackLength = commentStops.length;
2848   //      int[] oldStack = commentStops;
2849   //      commentStops = new int[oldStackLength + 30];
2850   //      System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2851   //      commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2852   //      //grows the positions buffers too
2853   //      int[] old = commentStarts;
2854   //      commentStarts = new int[oldStackLength + 30];
2855   //      System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2856   //    }
2857   //    //the buffer is of a correct size here
2858   //    commentStarts[commentPtr] = startPosition;
2859   //  }
2860   public void resetTo(int begin, int end) {
2861     //reset the scanner to a given position where it may rescan again
2862     diet = false;
2863     initialPosition = startPosition = currentPosition = begin;
2864     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2865     commentPtr = -1; // reset comment stack
2866   }
2867
2868   public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2869     // the string with "\\u" is a legal string of two chars \ and u
2870     //thus we use a direct access to the source (for regular cases).
2871     //    if (unicodeAsBackSlash) {
2872     //      // consume next character
2873     //      unicodeAsBackSlash = false;
2874     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2875     //        && (source[currentPosition] == 'u')) {
2876     //        getNextUnicodeChar();
2877     //      } else {
2878     //        if (withoutUnicodePtr != 0) {
2879     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2880     //        }
2881     //      }
2882     //    } else
2883     currentCharacter = source[currentPosition++];
2884     switch (currentCharacter) {
2885     case '\'':
2886       currentCharacter = '\'';
2887       break;
2888     case '\\':
2889       currentCharacter = '\\';
2890       break;
2891     default:
2892       currentCharacter = '\\';
2893       currentPosition--;
2894     }
2895   }
2896
2897   public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2898     currentCharacter = source[currentPosition++];
2899     switch (currentCharacter) {
2900     //      case 'b' :
2901     //        currentCharacter = '\b';
2902     //        break;
2903     case 't':
2904       currentCharacter = '\t';
2905       break;
2906     case 'n':
2907       currentCharacter = '\n';
2908       break;
2909     //      case 'f' :
2910     //        currentCharacter = '\f';
2911     //        break;
2912     case 'r':
2913       currentCharacter = '\r';
2914       break;
2915     case '\"':
2916       currentCharacter = '\"';
2917       break;
2918     case '\'':
2919       currentCharacter = '\'';
2920       break;
2921     case '\\':
2922       currentCharacter = '\\';
2923       break;
2924     case '$':
2925       currentCharacter = '$';
2926       break;
2927     default:
2928       // -----------octal escape--------------
2929       // OctalDigit
2930       // OctalDigit OctalDigit
2931       // ZeroToThree OctalDigit OctalDigit
2932       int number = Character.getNumericValue(currentCharacter);
2933       if (number >= 0 && number <= 7) {
2934         boolean zeroToThreeNot = number > 3;
2935         if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2936           int digit = Character.getNumericValue(currentCharacter);
2937           if (digit >= 0 && digit <= 7) {
2938             number = (number * 8) + digit;
2939             if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2940               if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2941                 // Digit --> ignore last character
2942                 currentPosition--;
2943               } else {
2944                 digit = Character.getNumericValue(currentCharacter);
2945                 if (digit >= 0 && digit <= 7) {
2946                   // has read \ZeroToThree OctalDigit OctalDigit
2947                   number = (number * 8) + digit;
2948                 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2949                   // --> ignore last character
2950                   currentPosition--;
2951                 }
2952               }
2953             } else { // has read \OctalDigit NonDigit--> ignore last
2954               // character
2955               currentPosition--;
2956             }
2957           } else { // has read \OctalDigit NonOctalDigit--> ignore last
2958             // character
2959             currentPosition--;
2960           }
2961         } else { // has read \OctalDigit --> ignore last character
2962           currentPosition--;
2963         }
2964         if (number > 255)
2965           throw new InvalidInputException(INVALID_ESCAPE);
2966         currentCharacter = (char) number;
2967       }
2968     //else
2969     //     throw new InvalidInputException(INVALID_ESCAPE);
2970     }
2971   }
2972
2973   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2974   //    return scanIdentifierOrKeyword( false );
2975   //  }
2976   public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
2977     //test keywords
2978     //first dispatch on the first char.
2979     //then the length. If there are several
2980     //keywors with the same length AND the same first char, then do another
2981     //disptach on the second char :-)...cool....but fast !
2982     useAssertAsAnIndentifier = false;
2983     while (getNextCharAsJavaIdentifierPart()) {
2984     }
2985     ;
2986     if (isVariable) {
2987       //      if (new String(getCurrentTokenSource()).equals("$this")) {
2988       //        return TokenNamethis;
2989       //      }
2990       return TokenNameVariable;
2991     }
2992     int index, length;
2993     char[] data;
2994     char firstLetter;
2995     //    if (withoutUnicodePtr == 0)
2996     //quick test on length == 1 but not on length > 12 while most identifier
2997     //have a length which is <= 12...but there are lots of identifier with
2998     //only one char....
2999     //      {
3000     if ((length = currentPosition - startPosition) == 1)
3001       return TokenNameIdentifier;
3002     //  data = source;
3003     data = new char[length];
3004     index = startPosition;
3005     for (int i = 0; i < length; i++) {
3006       data[i] = Character.toLowerCase(source[index + i]);
3007     }
3008     index = 0;
3009     //    } else {
3010     //      if ((length = withoutUnicodePtr) == 1)
3011     //        return TokenNameIdentifier;
3012     //      // data = withoutUnicodeBuffer;
3013     //      data = new char[withoutUnicodeBuffer.length];
3014     //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
3015     //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
3016     //      }
3017     //      index = 1;
3018     //    }
3019     firstLetter = data[index];
3020     switch (firstLetter) {
3021     case '_':
3022       switch (length) {
3023       case 8:
3024         //__FILE__
3025         if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
3026             && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3027           return TokenNameFILE;
3028         index = 0; //__LINE__
3029         if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
3030             && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3031           return TokenNameLINE;
3032         break;
3033       case 9:
3034         //__CLASS__
3035         if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3036             && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
3037           return TokenNameCLASS_C;
3038         break;
3039       case 11:
3040         //__METHOD__
3041         if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
3042             && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
3043             && (data[++index] == '_'))
3044           return TokenNameMETHOD_C;
3045         break;
3046       case 12:
3047         //__FUNCTION__
3048         if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
3049             && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
3050             && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
3051           return TokenNameFUNC_C;
3052         break;
3053       }
3054       return TokenNameIdentifier;
3055     case 'a':
3056       // as and array abstract
3057       switch (length) {
3058       case 2:
3059         //as
3060         if ((data[++index] == 's')) {
3061           return TokenNameas;
3062         } else {
3063           return TokenNameIdentifier;
3064         }
3065       case 3:
3066         //and
3067         if ((data[++index] == 'n') && (data[++index] == 'd')) {
3068           return TokenNameand;
3069         } else {
3070           return TokenNameIdentifier;
3071         }
3072       case 5:
3073         // array
3074         if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3075           return TokenNamearray;
3076         else
3077           return TokenNameIdentifier;
3078       case 8:
3079         if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3080             && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3081           return TokenNameabstract;
3082         else
3083           return TokenNameIdentifier;
3084       default:
3085         return TokenNameIdentifier;
3086       }
3087     case 'b':
3088       //break
3089       switch (length) {
3090       case 5:
3091         if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3092           return TokenNamebreak;
3093         else
3094           return TokenNameIdentifier;
3095       default:
3096         return TokenNameIdentifier;
3097       }
3098     case 'c':
3099       //case catch class clone const continue
3100       switch (length) {
3101       case 4:
3102         if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3103           return TokenNamecase;
3104         else
3105           return TokenNameIdentifier;
3106       case 5:
3107         if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3108           return TokenNamecatch;
3109         index = 0;
3110         if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3111           return TokenNameclass;
3112         index = 0;
3113         if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3114           return TokenNameclone;
3115         index = 0;
3116         if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3117           return TokenNameconst;
3118         else
3119           return TokenNameIdentifier;
3120       case 8:
3121         if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3122             && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3123           return TokenNamecontinue;
3124         else
3125           return TokenNameIdentifier;
3126       default:
3127         return TokenNameIdentifier;
3128       }
3129     case 'd':
3130       // declare default do die
3131       // TODO delete define ==> no keyword !
3132       switch (length) {
3133       case 2:
3134         if ((data[++index] == 'o'))
3135           return TokenNamedo;
3136         else
3137           return TokenNameIdentifier;
3138       //          case 6 :
3139       //            if ((data[++index] == 'e')
3140       //              && (data[++index] == 'f')
3141       //              && (data[++index] == 'i')
3142       //              && (data[++index] == 'n')
3143       //              && (data[++index] == 'e'))
3144       //              return TokenNamedefine;
3145       //            else
3146       //              return TokenNameIdentifier;
3147       case 7:
3148         if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3149             && (data[++index] == 'r') && (data[++index] == 'e'))
3150           return TokenNamedeclare;
3151         index = 0;
3152         if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3153             && (data[++index] == 'l') && (data[++index] == 't'))
3154           return TokenNamedefault;
3155         else
3156           return TokenNameIdentifier;
3157       default:
3158         return TokenNameIdentifier;
3159       }
3160     case 'e':
3161       //echo else exit elseif extends eval
3162       switch (length) {
3163       case 4:
3164         if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3165           return TokenNameecho;
3166         else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3167           return TokenNameelse;
3168         else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3169           return TokenNameexit;
3170         else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3171           return TokenNameeval;
3172         else
3173           return TokenNameIdentifier;
3174       case 5:
3175         // endif empty
3176         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3177           return TokenNameendif;
3178         if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3179           return TokenNameempty;
3180         else
3181           return TokenNameIdentifier;
3182       case 6:
3183         // endfor
3184         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3185             && (data[++index] == 'r'))
3186           return TokenNameendfor;
3187         else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3188             && (data[++index] == 'f'))
3189           return TokenNameelseif;
3190         else
3191           return TokenNameIdentifier;
3192       case 7:
3193         if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3194             && (data[++index] == 'd') && (data[++index] == 's'))
3195           return TokenNameextends;
3196         else
3197           return TokenNameIdentifier;
3198       case 8:
3199         // endwhile
3200         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3201             && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3202           return TokenNameendwhile;
3203         else
3204           return TokenNameIdentifier;
3205       case 9:
3206         // endswitch
3207         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3208             && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3209           return TokenNameendswitch;
3210         else
3211           return TokenNameIdentifier;
3212       case 10:
3213         // enddeclare
3214         if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3215             && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3216             && (data[++index] == 'e'))
3217           return TokenNameenddeclare;
3218         index = 0;
3219         if ((data[++index] == 'n') // endforeach
3220             && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3221             && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3222           return TokenNameendforeach;
3223         else
3224           return TokenNameIdentifier;
3225       default:
3226         return TokenNameIdentifier;
3227       }
3228     case 'f':
3229       //for false final function
3230       switch (length) {
3231       case 3:
3232         if ((data[++index] == 'o') && (data[++index] == 'r'))
3233           return TokenNamefor;
3234         else
3235           return TokenNameIdentifier;
3236       case 5:
3237         //            if ((data[++index] == 'a') && (data[++index] == 'l')
3238         //                && (data[++index] == 's') && (data[++index] == 'e'))
3239         //              return TokenNamefalse;
3240         if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3241           return TokenNamefinal;
3242         else
3243           return TokenNameIdentifier;
3244       case 7:
3245         // foreach
3246         if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3247             && (data[++index] == 'c') && (data[++index] == 'h'))
3248           return TokenNameforeach;
3249         else
3250           return TokenNameIdentifier;
3251       case 8:
3252         // function
3253         if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3254             && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3255           return TokenNamefunction;
3256         else
3257           return TokenNameIdentifier;
3258       default:
3259         return TokenNameIdentifier;
3260       }
3261     case 'g':
3262       //global
3263       if (length == 6) {
3264         if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3265             && (data[++index] == 'l')) {
3266           return TokenNameglobal;
3267         }
3268       }
3269       return TokenNameIdentifier;
3270     case 'i':
3271       //if int isset include include_once instanceof interface implements
3272       switch (length) {
3273       case 2:
3274         if (data[++index] == 'f')
3275           return TokenNameif;
3276         else
3277           return TokenNameIdentifier;
3278       //          case 3 :
3279       //            if ((data[++index] == 'n') && (data[++index] == 't'))
3280       //              return TokenNameint;
3281       //            else
3282       //              return TokenNameIdentifier;
3283       case 5:
3284         if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3285           return TokenNameisset;
3286         else
3287           return TokenNameIdentifier;
3288       case 7:
3289         if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3290             && (data[++index] == 'd') && (data[++index] == 'e'))
3291           return TokenNameinclude;
3292         else
3293           return TokenNameIdentifier;
3294       case 9:
3295         // interface
3296         if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3297             && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3298           return TokenNameinterface;
3299         else
3300           return TokenNameIdentifier;
3301       case 10:
3302         // instanceof
3303         if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3304             && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3305             && (data[++index] == 'f'))
3306           return TokenNameinstanceof;
3307         if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3308             && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3309             && (data[++index] == 's'))
3310           return TokenNameimplements;
3311         else
3312           return TokenNameIdentifier;
3313       case 12:
3314         if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3315             && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3316             && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3317           return TokenNameinclude_once;
3318         else
3319           return TokenNameIdentifier;
3320       default:
3321         return TokenNameIdentifier;
3322       }
3323     case 'l':
3324       //list
3325       if (length == 4) {
3326         if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3327           return TokenNamelist;
3328         }
3329       }
3330       return TokenNameIdentifier;
3331     case 'n':
3332       // new null
3333       switch (length) {
3334       case 3:
3335         if ((data[++index] == 'e') && (data[++index] == 'w'))
3336           return TokenNamenew;
3337         else
3338           return TokenNameIdentifier;
3339       //          case 4 :
3340       //            if ((data[++index] == 'u') && (data[++index] == 'l')
3341       //                && (data[++index] == 'l'))
3342       //              return TokenNamenull;
3343       //            else
3344       //              return TokenNameIdentifier;
3345       default:
3346         return TokenNameIdentifier;
3347       }
3348     case 'o':
3349       // or old_function
3350       if (length == 2) {
3351         if (data[++index] == 'r') {
3352           return TokenNameor;
3353         }
3354       }
3355       //        if (length == 12) {
3356       //          if ((data[++index] == 'l')
3357       //            && (data[++index] == 'd')
3358       //            && (data[++index] == '_')
3359       //            && (data[++index] == 'f')
3360       //            && (data[++index] == 'u')
3361       //            && (data[++index] == 'n')
3362       //            && (data[++index] == 'c')
3363       //            && (data[++index] == 't')
3364       //            && (data[++index] == 'i')
3365       //            && (data[++index] == 'o')
3366       //            && (data[++index] == 'n')) {
3367       //            return TokenNameold_function;
3368       //          }
3369       //        }
3370       return TokenNameIdentifier;
3371     case 'p':
3372       // print public private protected
3373       switch (length) {
3374       case 5:
3375         if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3376           return TokenNameprint;
3377         } else
3378           return TokenNameIdentifier;
3379       case 6:
3380         if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3381             && (data[++index] == 'c')) {
3382           return TokenNamepublic;
3383         } else
3384           return TokenNameIdentifier;
3385       case 7:
3386         if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3387             && (data[++index] == 't') && (data[++index] == 'e')) {
3388           return TokenNameprivate;
3389         } else
3390           return TokenNameIdentifier;
3391       case 9:
3392         if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3393             && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3394           return TokenNameprotected;
3395         } else
3396           return TokenNameIdentifier;
3397       }
3398       return TokenNameIdentifier;
3399     case 'r':
3400       //return require require_once
3401       if (length == 6) {
3402         if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3403             && (data[++index] == 'n')) {
3404           return TokenNamereturn;
3405         }
3406       } else if (length == 7) {
3407         if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3408             && (data[++index] == 'r') && (data[++index] == 'e')) {
3409           return TokenNamerequire;
3410         }
3411       } else if (length == 12) {
3412         if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3413             && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3414             && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3415           return TokenNamerequire_once;
3416         }
3417       } else
3418         return TokenNameIdentifier;
3419     case 's':
3420       //static switch
3421       switch (length) {
3422       case 6:
3423         if (data[++index] == 't')
3424           if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3425             return TokenNamestatic;
3426           } else
3427             return TokenNameIdentifier;
3428         else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3429             && (data[++index] == 'h'))
3430           return TokenNameswitch;
3431         else
3432           return TokenNameIdentifier;
3433       default:
3434         return TokenNameIdentifier;
3435       }
3436     case 't':
3437       // try true throw
3438       switch (length) {
3439       case 3:
3440         if ((data[++index] == 'r') && (data[++index] == 'y'))
3441           return TokenNametry;
3442         else
3443           return TokenNameIdentifier;
3444       //          case 4 :
3445       //            if ((data[++index] == 'r') && (data[++index] == 'u')
3446       //                && (data[++index] == 'e'))
3447       //              return TokenNametrue;
3448       //            else
3449       //              return TokenNameIdentifier;
3450       case 5:
3451         if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3452           return TokenNamethrow;
3453         else
3454           return TokenNameIdentifier;
3455       default:
3456         return TokenNameIdentifier;
3457       }
3458     case 'u':
3459       //use unset
3460       switch (length) {
3461       case 3:
3462         if ((data[++index] == 's') && (data[++index] == 'e'))
3463           return TokenNameuse;
3464         else
3465           return TokenNameIdentifier;
3466       case 5:
3467         if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3468           return TokenNameunset;
3469         else
3470           return TokenNameIdentifier;
3471       default:
3472         return TokenNameIdentifier;
3473       }
3474     case 'v':
3475       //var
3476       switch (length) {
3477       case 3:
3478         if ((data[++index] == 'a') && (data[++index] == 'r'))
3479           return TokenNamevar;
3480         else
3481           return TokenNameIdentifier;
3482       default:
3483         return TokenNameIdentifier;
3484       }
3485     case 'w':
3486       //while
3487       switch (length) {
3488       case 5:
3489         if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3490           return TokenNamewhile;
3491         else
3492           return TokenNameIdentifier;
3493       //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3494       // (data[++index]=='e') && (data[++index]=='f')&&
3495       // (data[++index]=='p'))
3496       //return TokenNamewidefp ;
3497       //else
3498       //return TokenNameIdentifier;
3499       default:
3500         return TokenNameIdentifier;
3501       }
3502     case 'x':
3503       //xor
3504       switch (length) {
3505       case 3:
3506         if ((data[++index] == 'o') && (data[++index] == 'r'))
3507           return TokenNamexor;
3508         else
3509           return TokenNameIdentifier;
3510       default:
3511         return TokenNameIdentifier;
3512       }
3513     default:
3514       return TokenNameIdentifier;
3515     }
3516   }
3517
3518   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3519     //when entering this method the currentCharacter is the firt
3520     //digit of the number , i.e. it may be preceeded by a . when
3521     //dotPrefix is true
3522     boolean floating = dotPrefix;
3523     if ((!dotPrefix) && (currentCharacter == '0')) {
3524       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3525         //force the first char of the hexa number do exist...
3526         // consume next character
3527         unicodeAsBackSlash = false;
3528         currentCharacter = source[currentPosition++];
3529         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3530         //          && (source[currentPosition] == 'u')) {
3531         //          getNextUnicodeChar();
3532         //        } else {
3533         //          if (withoutUnicodePtr != 0) {
3534         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3535         //          }
3536         //        }
3537         if (Character.digit(currentCharacter, 16) == -1)
3538           throw new InvalidInputException(INVALID_HEXA);
3539         //---end forcing--
3540         while (getNextCharAsDigit(16)) {
3541         }
3542         ;
3543         //        if (getNextChar('l', 'L') >= 0)
3544         //          return TokenNameLongLiteral;
3545         //        else
3546         return TokenNameIntegerLiteral;
3547       }
3548       //there is x or X in the number
3549       //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3550       // 00078.0 is true !!!!! crazy language
3551       if (getNextCharAsDigit()) {
3552         //-------------potential octal-----------------
3553         while (getNextCharAsDigit()) {
3554         }
3555         ;
3556         //        if (getNextChar('l', 'L') >= 0) {
3557         //          return TokenNameLongLiteral;
3558         //        }
3559         //
3560         //        if (getNextChar('f', 'F') >= 0) {
3561         //          return TokenNameFloatingPointLiteral;
3562         //        }
3563         if (getNextChar('d', 'D') >= 0) {
3564           return TokenNameDoubleLiteral;
3565         } else { //make the distinction between octal and float ....
3566           if (getNextChar('.')) { //bingo ! ....
3567             while (getNextCharAsDigit()) {
3568             }
3569             ;
3570             if (getNextChar('e', 'E') >= 0) {
3571               // consume next character
3572               unicodeAsBackSlash = false;
3573               currentCharacter = source[currentPosition++];
3574               //              if (((currentCharacter = source[currentPosition++]) == '\\')
3575               //                && (source[currentPosition] == 'u')) {
3576               //                getNextUnicodeChar();
3577               //              } else {
3578               //                if (withoutUnicodePtr != 0) {
3579               //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3580               //                }
3581               //              }
3582               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3583                 // consume next character
3584                 unicodeAsBackSlash = false;
3585                 currentCharacter = source[currentPosition++];
3586                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3587                 //                  && (source[currentPosition] == 'u')) {
3588                 //                  getNextUnicodeChar();
3589                 //                } else {
3590                 //                  if (withoutUnicodePtr != 0) {
3591                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3592                 //                      currentCharacter;
3593                 //                  }
3594                 //                }
3595               }
3596               if (!Character.isDigit(currentCharacter))
3597                 throw new InvalidInputException(INVALID_FLOAT);
3598               while (getNextCharAsDigit()) {
3599               }
3600               ;
3601             }
3602             //            if (getNextChar('f', 'F') >= 0)
3603             //              return TokenNameFloatingPointLiteral;
3604             getNextChar('d', 'D'); //jump over potential d or D
3605             return TokenNameDoubleLiteral;
3606           } else {
3607             return TokenNameIntegerLiteral;
3608           }
3609         }
3610       } else {
3611         /* carry on */
3612       }
3613     }
3614     while (getNextCharAsDigit()) {
3615     }
3616     ;
3617     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3618     //      return TokenNameLongLiteral;
3619     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3620       while (getNextCharAsDigit()) {
3621       }
3622       ;
3623       floating = true;
3624     }
3625     //if floating is true both exponant and suffix may be optional
3626     if (getNextChar('e', 'E') >= 0) {
3627       floating = true;
3628       // consume next character
3629       unicodeAsBackSlash = false;
3630       currentCharacter = source[currentPosition++];
3631       //      if (((currentCharacter = source[currentPosition++]) == '\\')
3632       //        && (source[currentPosition] == 'u')) {
3633       //        getNextUnicodeChar();
3634       //      } else {
3635       //        if (withoutUnicodePtr != 0) {
3636       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3637       //        }
3638       //      }
3639       if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3640         // next
3641         // character
3642         unicodeAsBackSlash = false;
3643         currentCharacter = source[currentPosition++];
3644         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3645         //          && (source[currentPosition] == 'u')) {
3646         //          getNextUnicodeChar();
3647         //        } else {
3648         //          if (withoutUnicodePtr != 0) {
3649         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3650         //          }
3651         //        }
3652       }
3653       if (!Character.isDigit(currentCharacter))
3654         throw new InvalidInputException(INVALID_FLOAT);
3655       while (getNextCharAsDigit()) {
3656       }
3657       ;
3658     }
3659     if (getNextChar('d', 'D') >= 0)
3660       return TokenNameDoubleLiteral;
3661     //    if (getNextChar('f', 'F') >= 0)
3662     //      return TokenNameFloatingPointLiteral;
3663     //the long flag has been tested before
3664     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3665   }
3666
3667   /**
3668    * Search the line number corresponding to a specific position
3669    *
3670    */
3671   public final int getLineNumber(int position) {
3672     if (lineEnds == null)
3673       return 1;
3674     int length = linePtr + 1;
3675     if (length == 0)
3676       return 1;
3677     int g = 0, d = length - 1;
3678     int m = 0;
3679     while (g <= d) {
3680       m = (g + d) / 2;
3681       if (position < lineEnds[m]) {
3682         d = m - 1;
3683       } else if (position > lineEnds[m]) {
3684         g = m + 1;
3685       } else {
3686         return m + 1;
3687       }
3688     }
3689     if (position < lineEnds[m]) {
3690       return m + 1;
3691     }
3692     return m + 2;
3693   }
3694
3695   public void setPHPMode(boolean mode) {
3696     phpMode = mode;
3697   }
3698
3699   public final void setSource(char[] source) {
3700     setSource(null, source);
3701   }
3702
3703   public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3704     //the source-buffer is set to sourceString
3705     this.compilationUnit = compilationUnit;
3706     if (source == null) {
3707       this.source = new char[0];
3708     } else {
3709       this.source = source;
3710     }
3711     startPosition = -1;
3712     initialPosition = currentPosition = 0;
3713     containsAssertKeyword = false;
3714     withoutUnicodeBuffer = new char[this.source.length];
3715 //    encapsedStringStack = new Stack();
3716   }
3717
3718   public String toString() {
3719     if (startPosition == source.length)
3720       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3721     if (currentPosition > source.length)
3722       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3723     char front[] = new char[startPosition];
3724     System.arraycopy(source, 0, front, 0, startPosition);
3725     int middleLength = (currentPosition - 1) - startPosition + 1;
3726     char middle[];
3727     if (middleLength > -1) {
3728       middle = new char[middleLength];
3729       System.arraycopy(source, startPosition, middle, 0, middleLength);
3730     } else {
3731       middle = new char[0];
3732     }
3733     char end[] = new char[source.length - (currentPosition - 1)];
3734     System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3735     return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3736         + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3737         + new String(end);
3738   }
3739
3740   public final String toStringAction(int act) {
3741     switch (act) {
3742     case TokenNameERROR:
3743       return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3744     // //$NON-NLS-1$
3745     case TokenNameINLINE_HTML:
3746       return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3747     case TokenNameIdentifier:
3748       return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3749     case TokenNameVariable:
3750       return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3751     case TokenNameabstract:
3752       return "abstract"; //$NON-NLS-1$
3753     case TokenNameand:
3754       return "AND"; //$NON-NLS-1$
3755     case TokenNamearray:
3756       return "array"; //$NON-NLS-1$
3757     case TokenNameas:
3758       return "as"; //$NON-NLS-1$
3759     case TokenNamebreak:
3760       return "break"; //$NON-NLS-1$
3761     case TokenNamecase:
3762       return "case"; //$NON-NLS-1$
3763     case TokenNameclass:
3764       return "class"; //$NON-NLS-1$
3765     case TokenNamecatch:
3766       return "catch"; //$NON-NLS-1$
3767     case TokenNameclone:
3768       //$NON-NLS-1$
3769       return "clone";
3770     case TokenNameconst:
3771       //$NON-NLS-1$
3772       return "const";
3773     case TokenNamecontinue:
3774       return "continue"; //$NON-NLS-1$
3775     case TokenNamedefault:
3776       return "default"; //$NON-NLS-1$
3777     //      case TokenNamedefine :
3778     //        return "define"; //$NON-NLS-1$
3779     case TokenNamedo:
3780       return "do"; //$NON-NLS-1$
3781     case TokenNameecho:
3782       return "echo"; //$NON-NLS-1$
3783     case TokenNameelse:
3784       return "else"; //$NON-NLS-1$
3785     case TokenNameelseif:
3786       return "elseif"; //$NON-NLS-1$
3787     case TokenNameendfor:
3788       return "endfor"; //$NON-NLS-1$
3789     case TokenNameendforeach:
3790       return "endforeach"; //$NON-NLS-1$
3791     case TokenNameendif:
3792       return "endif"; //$NON-NLS-1$
3793     case TokenNameendswitch:
3794       return "endswitch"; //$NON-NLS-1$
3795     case TokenNameendwhile:
3796       return "endwhile"; //$NON-NLS-1$
3797     case TokenNameexit:
3798       return "exit";
3799     case TokenNameextends:
3800       return "extends"; //$NON-NLS-1$
3801     //      case TokenNamefalse :
3802     //        return "false"; //$NON-NLS-1$
3803     case TokenNamefinal:
3804       return "final"; //$NON-NLS-1$
3805     case TokenNamefor:
3806       return "for"; //$NON-NLS-1$
3807     case TokenNameforeach:
3808       return "foreach"; //$NON-NLS-1$
3809     case TokenNamefunction:
3810       return "function"; //$NON-NLS-1$
3811     case TokenNameglobal:
3812       return "global"; //$NON-NLS-1$
3813     case TokenNameif:
3814       return "if"; //$NON-NLS-1$
3815     case TokenNameimplements:
3816       return "implements"; //$NON-NLS-1$
3817     case TokenNameinclude:
3818       return "include"; //$NON-NLS-1$
3819     case TokenNameinclude_once:
3820       return "include_once"; //$NON-NLS-1$
3821     case TokenNameinstanceof:
3822       return "instanceof"; //$NON-NLS-1$
3823     case TokenNameinterface:
3824       return "interface"; //$NON-NLS-1$
3825     case TokenNameisset:
3826       return "isset"; //$NON-NLS-1$
3827     case TokenNamelist:
3828       return "list"; //$NON-NLS-1$
3829     case TokenNamenew:
3830       return "new"; //$NON-NLS-1$
3831     //      case TokenNamenull :
3832     //        return "null"; //$NON-NLS-1$
3833     case TokenNameor:
3834       return "OR"; //$NON-NLS-1$
3835     case TokenNameprint:
3836       return "print"; //$NON-NLS-1$
3837     case TokenNameprivate:
3838       return "private"; //$NON-NLS-1$
3839     case TokenNameprotected:
3840       return "protected"; //$NON-NLS-1$
3841     case TokenNamepublic:
3842       return "public"; //$NON-NLS-1$
3843     case TokenNamerequire:
3844       return "require"; //$NON-NLS-1$
3845     case TokenNamerequire_once:
3846       return "require_once"; //$NON-NLS-1$
3847     case TokenNamereturn:
3848       return "return"; //$NON-NLS-1$
3849     case TokenNamestatic:
3850       return "static"; //$NON-NLS-1$
3851     case TokenNameswitch:
3852       return "switch"; //$NON-NLS-1$
3853     //      case TokenNametrue :
3854     //        return "true"; //$NON-NLS-1$
3855     case TokenNameunset:
3856       return "unset"; //$NON-NLS-1$
3857     case TokenNamevar:
3858       return "var"; //$NON-NLS-1$
3859     case TokenNamewhile:
3860       return "while"; //$NON-NLS-1$
3861     case TokenNamexor:
3862       return "XOR"; //$NON-NLS-1$
3863     //      case TokenNamethis :
3864     //        return "$this"; //$NON-NLS-1$
3865     case TokenNameIntegerLiteral:
3866       return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3867     case TokenNameDoubleLiteral:
3868       return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3869     case TokenNameStringDoubleQuote:
3870       return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3871     case TokenNameStringSingleQuote:
3872       return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3873     case TokenNameStringInterpolated:
3874       return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3875     case TokenNameEncapsedString0:
3876       return "`"; //$NON-NLS-1$
3877 //    case TokenNameEncapsedString1:
3878 //      return "\'"; //$NON-NLS-1$
3879 //    case TokenNameEncapsedString2:
3880 //      return "\""; //$NON-NLS-1$
3881     case TokenNameSTRING:
3882       return "STRING_DQ(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3883     case TokenNameHEREDOC:
3884       return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3885     case TokenNamePLUS_PLUS:
3886       return "++"; //$NON-NLS-1$
3887     case TokenNameMINUS_MINUS:
3888       return "--"; //$NON-NLS-1$
3889     case TokenNameEQUAL_EQUAL:
3890       return "=="; //$NON-NLS-1$
3891     case TokenNameEQUAL_EQUAL_EQUAL:
3892       return "==="; //$NON-NLS-1$
3893     case TokenNameEQUAL_GREATER:
3894       return "=>"; //$NON-NLS-1$
3895     case TokenNameLESS_EQUAL:
3896       return "<="; //$NON-NLS-1$
3897     case TokenNameGREATER_EQUAL:
3898       return ">="; //$NON-NLS-1$
3899     case TokenNameNOT_EQUAL:
3900       return "!="; //$NON-NLS-1$
3901     case TokenNameNOT_EQUAL_EQUAL:
3902       return "!=="; //$NON-NLS-1$
3903     case TokenNameLEFT_SHIFT:
3904       return "<<"; //$NON-NLS-1$
3905     case TokenNameRIGHT_SHIFT:
3906       return ">>"; //$NON-NLS-1$
3907     case TokenNamePLUS_EQUAL:
3908       return "+="; //$NON-NLS-1$
3909     case TokenNameMINUS_EQUAL:
3910       return "-="; //$NON-NLS-1$
3911     case TokenNameMULTIPLY_EQUAL:
3912       return "*="; //$NON-NLS-1$
3913     case TokenNameDIVIDE_EQUAL:
3914       return "/="; //$NON-NLS-1$
3915     case TokenNameAND_EQUAL:
3916       return "&="; //$NON-NLS-1$
3917     case TokenNameOR_EQUAL:
3918       return "|="; //$NON-NLS-1$
3919     case TokenNameXOR_EQUAL:
3920       return "^="; //$NON-NLS-1$
3921     case TokenNameREMAINDER_EQUAL:
3922       return "%="; //$NON-NLS-1$
3923     case TokenNameDOT_EQUAL:
3924       return ".="; //$NON-NLS-1$
3925     case TokenNameLEFT_SHIFT_EQUAL:
3926       return "<<="; //$NON-NLS-1$
3927     case TokenNameRIGHT_SHIFT_EQUAL:
3928       return ">>="; //$NON-NLS-1$
3929     case TokenNameOR_OR:
3930       return "||"; //$NON-NLS-1$
3931     case TokenNameAND_AND:
3932       return "&&"; //$NON-NLS-1$
3933     case TokenNamePLUS:
3934       return "+"; //$NON-NLS-1$
3935     case TokenNameMINUS:
3936       return "-"; //$NON-NLS-1$
3937     case TokenNameMINUS_GREATER:
3938       return "->";
3939     case TokenNameNOT:
3940       return "!"; //$NON-NLS-1$
3941     case TokenNameREMAINDER:
3942       return "%"; //$NON-NLS-1$
3943     case TokenNameXOR:
3944       return "^"; //$NON-NLS-1$
3945     case TokenNameAND:
3946       return "&"; //$NON-NLS-1$
3947     case TokenNameMULTIPLY:
3948       return "*"; //$NON-NLS-1$
3949     case TokenNameOR:
3950       return "|"; //$NON-NLS-1$
3951     case TokenNameTWIDDLE:
3952       return "~"; //$NON-NLS-1$
3953     case TokenNameTWIDDLE_EQUAL:
3954       return "~="; //$NON-NLS-1$
3955     case TokenNameDIVIDE:
3956       return "/"; //$NON-NLS-1$
3957     case TokenNameGREATER:
3958       return ">"; //$NON-NLS-1$
3959     case TokenNameLESS:
3960       return "<"; //$NON-NLS-1$
3961     case TokenNameLPAREN:
3962       return "("; //$NON-NLS-1$
3963     case TokenNameRPAREN:
3964       return ")"; //$NON-NLS-1$
3965     case TokenNameLBRACE:
3966       return "{"; //$NON-NLS-1$
3967     case TokenNameRBRACE:
3968       return "}"; //$NON-NLS-1$
3969     case TokenNameLBRACKET:
3970       return "["; //$NON-NLS-1$
3971     case TokenNameRBRACKET:
3972       return "]"; //$NON-NLS-1$
3973     case TokenNameSEMICOLON:
3974       return ";"; //$NON-NLS-1$
3975     case TokenNameQUESTION:
3976       return "?"; //$NON-NLS-1$
3977     case TokenNameCOLON:
3978       return ":"; //$NON-NLS-1$
3979     case TokenNameCOMMA:
3980       return ","; //$NON-NLS-1$
3981     case TokenNameDOT:
3982       return "."; //$NON-NLS-1$
3983     case TokenNameEQUAL:
3984       return "="; //$NON-NLS-1$
3985     case TokenNameAT:
3986       return "@";
3987     case TokenNameDOLLAR:
3988       return "$";
3989     case TokenNameDOLLAR_LBRACE:
3990       return "${";
3991     case TokenNameLBRACE_DOLLAR:
3992       return "{$";
3993     case TokenNameEOF:
3994       return "EOF"; //$NON-NLS-1$
3995     case TokenNameWHITESPACE:
3996       return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3997     case TokenNameCOMMENT_LINE:
3998       return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3999     case TokenNameCOMMENT_BLOCK:
4000       return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4001     case TokenNameCOMMENT_PHPDOC:
4002       return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4003     //      case TokenNameHTML :
4004     //        return "HTML(" + new String(getCurrentTokenSource()) + ")";
4005     // //$NON-NLS-1$
4006     case TokenNameFILE:
4007       return "__FILE__"; //$NON-NLS-1$
4008     case TokenNameLINE:
4009       return "__LINE__"; //$NON-NLS-1$
4010     case TokenNameCLASS_C:
4011       return "__CLASS__"; //$NON-NLS-1$
4012     case TokenNameMETHOD_C:
4013       return "__METHOD__"; //$NON-NLS-1$
4014     case TokenNameFUNC_C:
4015       return "__FUNCTION__"; //$NON-NLS-1
4016     case TokenNameboolCAST:
4017       return "( bool )"; //$NON-NLS-1$
4018     case TokenNameintCAST:
4019       return "( int )"; //$NON-NLS-1$
4020     case TokenNamedoubleCAST:
4021       return "( double )"; //$NON-NLS-1$
4022     case TokenNameobjectCAST:
4023       return "( object )"; //$NON-NLS-1$
4024     case TokenNamestringCAST:
4025       return "( string )"; //$NON-NLS-1$
4026     default:
4027       return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
4028     }
4029   }
4030
4031   public Scanner() {
4032     this(false, false);
4033   }
4034
4035   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
4036     this(tokenizeComments, tokenizeWhiteSpace, false);
4037   }
4038
4039   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
4040     this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
4041   }
4042
4043   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4044       boolean assertMode) {
4045     this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null, true);
4046   }
4047
4048   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4049       boolean assertMode, boolean tokenizeStrings, char[][] taskTags, char[][] taskPriorities, boolean isTaskCaseSensitive) {
4050     this.eofPosition = Integer.MAX_VALUE;
4051     this.tokenizeComments = tokenizeComments;
4052     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
4053     this.tokenizeStrings = tokenizeStrings;
4054     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
4055     this.assertMode = assertMode;
4056 //    this.encapsedStringStack = null;
4057     this.taskTags = taskTags;
4058     this.taskPriorities = taskPriorities;
4059   }
4060
4061   private void checkNonExternalizeString() throws InvalidInputException {
4062     if (currentLine == null)
4063       return;
4064     parseTags(currentLine);
4065   }
4066
4067   private void parseTags(NLSLine line) throws InvalidInputException {
4068     String s = new String(getCurrentTokenSource());
4069     int pos = s.indexOf(TAG_PREFIX);
4070     int lineLength = line.size();
4071     while (pos != -1) {
4072       int start = pos + TAG_PREFIX_LENGTH;
4073       int end = s.indexOf(TAG_POSTFIX, start);
4074       String index = s.substring(start, end);
4075       int i = 0;
4076       try {
4077         i = Integer.parseInt(index) - 1;
4078         // Tags are one based not zero based.
4079       } catch (NumberFormatException e) {
4080         i = -1; // we don't want to consider this as a valid NLS tag
4081       }
4082       if (line.exists(i)) {
4083         line.set(i, null);
4084       }
4085       pos = s.indexOf(TAG_PREFIX, start);
4086     }
4087     this.nonNLSStrings = new StringLiteral[lineLength];
4088     int nonNLSCounter = 0;
4089     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4090       StringLiteral literal = (StringLiteral) iterator.next();
4091       if (literal != null) {
4092         this.nonNLSStrings[nonNLSCounter++] = literal;
4093       }
4094     }
4095     if (nonNLSCounter == 0) {
4096       this.nonNLSStrings = null;
4097       currentLine = null;
4098       return;
4099     }
4100     this.wasNonExternalizedStringLiteral = true;
4101     if (nonNLSCounter != lineLength) {
4102       System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4103     }
4104     currentLine = null;
4105   }
4106
4107   public final void scanEscapeCharacter() throws InvalidInputException {
4108     // the string with "\\u" is a legal string of two chars \ and u
4109     //thus we use a direct access to the source (for regular cases).
4110     if (unicodeAsBackSlash) {
4111       // consume next character
4112       unicodeAsBackSlash = false;
4113       //                        if (((currentCharacter = source[currentPosition++]) == '\\') &&
4114       // (source[currentPosition] == 'u')) {
4115       //                                getNextUnicodeChar();
4116       //                        } else {
4117       if (withoutUnicodePtr != 0) {
4118         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4119         //                              }
4120       }
4121     } else
4122       currentCharacter = source[currentPosition++];
4123     switch (currentCharacter) {
4124     case 'b':
4125       currentCharacter = '\b';
4126       break;
4127     case 't':
4128       currentCharacter = '\t';
4129       break;
4130     case 'n':
4131       currentCharacter = '\n';
4132       break;
4133     case 'f':
4134       currentCharacter = '\f';
4135       break;
4136     case 'r':
4137       currentCharacter = '\r';
4138       break;
4139     case '\"':
4140       currentCharacter = '\"';
4141       break;
4142     case '\'':
4143       currentCharacter = '\'';
4144       break;
4145     case '\\':
4146       currentCharacter = '\\';
4147       break;
4148     default:
4149       // -----------octal escape--------------
4150       // OctalDigit
4151       // OctalDigit OctalDigit
4152       // ZeroToThree OctalDigit OctalDigit
4153       int number = Character.getNumericValue(currentCharacter);
4154       if (number >= 0 && number <= 7) {
4155         boolean zeroToThreeNot = number > 3;
4156         if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4157           int digit = Character.getNumericValue(currentCharacter);
4158           if (digit >= 0 && digit <= 7) {
4159             number = (number * 8) + digit;
4160             if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4161               if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4162                 // Digit --> ignore last character
4163                 currentPosition--;
4164               } else {
4165                 digit = Character.getNumericValue(currentCharacter);
4166                 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4167                   // OctalDigit OctalDigit
4168                   number = (number * 8) + digit;
4169                 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4170                   // --> ignore last character
4171                   currentPosition--;
4172                 }
4173               }
4174             } else { // has read \OctalDigit NonDigit--> ignore last
4175               // character
4176               currentPosition--;
4177             }
4178           } else { // has read \OctalDigit NonOctalDigit--> ignore last
4179             // character
4180             currentPosition--;
4181           }
4182         } else { // has read \OctalDigit --> ignore last character
4183           currentPosition--;
4184         }
4185         if (number > 255)
4186           throw new InvalidInputException(INVALID_ESCAPE);
4187         currentCharacter = (char) number;
4188       } else
4189         throw new InvalidInputException(INVALID_ESCAPE);
4190     }
4191   }
4192
4193   //chech presence of task: tags
4194   //TODO (frederic) see if we need to take unicode characters into account...
4195   public void checkTaskTag(int commentStart, int commentEnd) {
4196     char[] src = this.source;
4197
4198     // only look for newer task: tags
4199     if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4200       return;
4201     }
4202     int foundTaskIndex = this.foundTaskCount;
4203     char previous = src[commentStart + 1]; // should be '*' or '/'
4204     nextChar: for (int i = commentStart + 2; i < commentEnd && i < this.eofPosition; i++) {
4205       char[] tag = null;
4206       char[] priority = null;
4207       // check for tag occurrence only if not ambiguous with javadoc tag
4208       if (previous != '@') {
4209         nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4210           tag = this.taskTags[itag];
4211           int tagLength = tag.length;
4212           if (tagLength == 0)
4213             continue nextTag;
4214
4215           // ensure tag is not leaded with letter if tag starts with a letter
4216           if (Scanner.isPHPIdentifierStart(tag[0])) {
4217             if (Scanner.isPHPIdentifierPart(previous)) {
4218               continue nextTag;
4219             }
4220           }
4221
4222           for (int t = 0; t < tagLength; t++) {
4223             char sc, tc;
4224             int x = i + t;
4225             if (x >= this.eofPosition || x >= commentEnd)
4226               continue nextTag;
4227             if ((sc = src[i + t]) != (tc = tag[t])) { // case sensitive check
4228               if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) { // case insensitive check
4229                 continue nextTag;
4230               }
4231             }
4232           }
4233           // ensure tag is not followed with letter if tag finishes with a letter
4234           if (i + tagLength < commentEnd && Scanner.isPHPIdentifierPart(src[i + tagLength - 1])) {
4235             if (Scanner.isPHPIdentifierPart(src[i + tagLength]))
4236               continue nextTag;
4237           }
4238           if (this.foundTaskTags == null) {
4239             this.foundTaskTags = new char[5][];
4240             this.foundTaskMessages = new char[5][];
4241             this.foundTaskPriorities = new char[5][];
4242             this.foundTaskPositions = new int[5][];
4243           } else if (this.foundTaskCount == this.foundTaskTags.length) {
4244             System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4245                 this.foundTaskCount);
4246             System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4247                 this.foundTaskCount);
4248             System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4249                 this.foundTaskCount);
4250             System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4251                 this.foundTaskCount);
4252           }
4253
4254           priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4255
4256           this.foundTaskTags[this.foundTaskCount] = tag;
4257           this.foundTaskPriorities[this.foundTaskCount] = priority;
4258           this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4259           this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4260           this.foundTaskCount++;
4261           i += tagLength - 1; // will be incremented when looping
4262           break nextTag;
4263         }
4264       }
4265       previous = src[i];
4266     }
4267     for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4268       // retrieve message start and end positions
4269       int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4270       int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4271       // at most beginning of next task
4272       if (max_value < msgStart) {
4273         max_value = msgStart; // would only occur if tag is before EOF.
4274       }
4275       int end = -1;
4276       char c;
4277       for (int j = msgStart; j < max_value; j++) {
4278         if ((c = src[j]) == '\n' || c == '\r') {
4279           end = j - 1;
4280           break;
4281         }
4282       }
4283       if (end == -1) {
4284         for (int j = max_value; j > msgStart; j--) {
4285           if ((c = src[j]) == '*') {
4286             end = j - 1;
4287             break;
4288           }
4289         }
4290         if (end == -1)
4291           end = max_value;
4292       }
4293       if (msgStart == end)
4294         continue; // empty
4295       // trim the message
4296       while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
4297         end--;
4298       while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
4299         msgStart++;
4300       // update the end position of the task
4301       this.foundTaskPositions[i][1] = end;
4302       // get the message source
4303       final int messageLength = end - msgStart + 1;
4304       char[] message = new char[messageLength];
4305       System.arraycopy(src, msgStart, message, 0, messageLength);
4306       this.foundTaskMessages[i] = message;
4307     }
4308   }
4309
4310   // chech presence of task: tags
4311   //  public void checkTaskTag(int commentStart, int commentEnd) {
4312   //    // only look for newer task: tags
4313   //    if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4314   //      return;
4315   //    }
4316   //    int foundTaskIndex = this.foundTaskCount;
4317   //    nextChar: for (int i = commentStart; i < commentEnd && i < this.eofPosition; i++) {
4318   //      char[] tag = null;
4319   //      char[] priority = null;
4320   //      // check for tag occurrence
4321   //      nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4322   //        tag = this.taskTags[itag];
4323   //        priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4324   //        int tagLength = tag.length;
4325   //        for (int t = 0; t < tagLength; t++) {
4326   //          if (this.source[i + t] != tag[t])
4327   //            continue nextTag;
4328   //        }
4329   //        if (this.foundTaskTags == null) {
4330   //          this.foundTaskTags = new char[5][];
4331   //          this.foundTaskMessages = new char[5][];
4332   //          this.foundTaskPriorities = new char[5][];
4333   //          this.foundTaskPositions = new int[5][];
4334   //        } else if (this.foundTaskCount == this.foundTaskTags.length) {
4335   //          System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4336   //          System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4337   //              this.foundTaskCount);
4338   //          System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4339   //              this.foundTaskCount);
4340   //          System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4341   //              this.foundTaskCount);
4342   //        }
4343   //        this.foundTaskTags[this.foundTaskCount] = tag;
4344   //        this.foundTaskPriorities[this.foundTaskCount] = priority;
4345   //        this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4346   //        this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4347   //        this.foundTaskCount++;
4348   //        i += tagLength - 1; // will be incremented when looping
4349   //      }
4350   //    }
4351   //    for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4352   //      // retrieve message start and end positions
4353   //      int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4354   //      int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4355   //      // at most beginning of next task
4356   //      if (max_value < msgStart)
4357   //        max_value = msgStart; // would only occur if tag is before EOF.
4358   //      int end = -1;
4359   //      char c;
4360   //      for (int j = msgStart; j < max_value; j++) {
4361   //        if ((c = this.source[j]) == '\n' || c == '\r') {
4362   //          end = j - 1;
4363   //          break;
4364   //        }
4365   //      }
4366   //      if (end == -1) {
4367   //        for (int j = max_value; j > msgStart; j--) {
4368   //          if ((c = this.source[j]) == '*') {
4369   //            end = j - 1;
4370   //            break;
4371   //          }
4372   //        }
4373   //        if (end == -1)
4374   //          end = max_value;
4375   //      }
4376   //      if (msgStart == end)
4377   //        continue; // empty
4378   //      // trim the message
4379   //      while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4380   //        end--;
4381   //      while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4382   //        msgStart++;
4383   //      // update the end position of the task
4384   //      this.foundTaskPositions[i][1] = end;
4385   //      // get the message source
4386   //      final int messageLength = end - msgStart + 1;
4387   //      char[] message = new char[messageLength];
4388   //      System.arraycopy(source, msgStart, message, 0, messageLength);
4389   //      this.foundTaskMessages[i] = message;
4390   //    }
4391   //  }
4392 }