net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java

   1 /***********************************************************************************************************************************
   2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
   3  * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
   4  * and is available at http://www.eclipse.org/legal/cpl-v05.html
   5  *
   6  * Contributors: IBM Corporation - initial API and implementation
   7  **********************************************************************************************************************************/
   8 package net.sourceforge.phpdt.internal.compiler.parser;
   9
  10 import java.util.ArrayList;
  11 import java.util.Iterator;
  12 import java.util.List;
  13
  14 import net.sourceforge.phpdt.core.compiler.CharOperation;
  15 import net.sourceforge.phpdt.core.compiler.IScanner;
  16 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
  17 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
  18 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
  19 import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
  20
  21 public class Scanner implements IScanner, ITerminalSymbols {
  22         /*
  23          * APIs ares - getNextToken() which return the current type of the token (this
  24          * value is not memorized by the scanner) - getCurrentTokenSource() which
  25          * provides with the token "REAL" source (aka all unicode have been
  26          * transformed into a correct char) - sourceStart gives the position into the
  27          * stream - currentPosition-1 gives the sourceEnd position into the stream
  28          */
  29         // 1.4 feature
  30 //      private boolean assertMode;
  31
  32         public boolean useAssertAsAnIndentifier = false;
  33
  34         // flag indicating if processed source contains occurrences of keyword assert
  35         public boolean containsAssertKeyword = false;
  36
  37         public boolean recordLineSeparator;
  38
  39         public boolean ignorePHPOneLiner = false;
  40
  41         public boolean phpMode = false;
  42
  43         public boolean phpExpressionTag = false;
  44
  45         // public Stack encapsedStringStack = null;
  46
  47         public char currentCharacter;
  48
  49         public int startPosition;
  50
  51         public int currentPosition;
  52
  53         public int initialPosition, eofPosition;
  54
  55         // after this position eof are generated instead of real token from the
  56         // source
  57         public boolean tokenizeComments;
  58
  59         public boolean tokenizeWhiteSpace;
  60
  61         public boolean tokenizeStrings;
  62
  63         // source should be viewed as a window (aka a part)
  64         // of a entire very large stream
  65         public char source[];
  66
  67         // unicode support
  68         public char[] withoutUnicodeBuffer;
  69
  70         public int withoutUnicodePtr;
  71
  72         // when == 0 ==> no unicode in the current token
  73         public boolean unicodeAsBackSlash = false;
  74
  75         public boolean scanningFloatLiteral = false;
  76
  77         // support for /** comments
  78         public int[] commentStops = new int[10];
  79
  80         public int[] commentStarts = new int[10];
  81
  82         public int commentPtr = -1; // no comment test with commentPtr value -1
  83
  84         protected int lastCommentLinePosition = -1;
  85
  86         // diet parsing support - jump over some method body when requested
  87         public boolean diet = false;
  88
  89         // support for the poor-line-debuggers ....
  90         // remember the position of the cr/lf
  91         public int[] lineEnds = new int[250];
  92
  93         public int linePtr = -1;
  94
  95         public boolean wasAcr = false;
  96
  97         public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
  98
  99         public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
 100
 101         public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
 102
 103         public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
 104
 105         public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
 106
 107         public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
 108
 109         public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
 110
 111         public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
 112
 113         public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
 114
 115         public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
 116
 117         public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
 118
 119         public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
 120
 121         // ----------------optimized identifier managment------------------
 122         static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
 123                         charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
 124                         charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
 125                         charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
 126                         charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
 127                         charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
 128                         charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
 129                         charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
 130                         charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
 131
 132         static final char[] charArray_va = new char[] { '$', 'a' }, charArray_vb = new char[] { '$', 'b' }, charArray_vc = new char[] {
 133                         '$', 'c' }, charArray_vd = new char[] { '$', 'd' }, charArray_ve = new char[] { '$', 'e' }, charArray_vf = new char[] { '$',
 134                         'f' }, charArray_vg = new char[] { '$', 'g' }, charArray_vh = new char[] { '$', 'h' },
 135                         charArray_vi = new char[] { '$', 'i' }, charArray_vj = new char[] { '$', 'j' }, charArray_vk = new char[] { '$', 'k' },
 136                         charArray_vl = new char[] { '$', 'l' }, charArray_vm = new char[] { '$', 'm' }, charArray_vn = new char[] { '$', 'n' },
 137                         charArray_vo = new char[] { '$', 'o' }, charArray_vp = new char[] { '$', 'p' }, charArray_vq = new char[] { '$', 'q' },
 138                         charArray_vr = new char[] { '$', 'r' }, charArray_vs = new char[] { '$', 's' }, charArray_vt = new char[] { '$', 't' },
 139                         charArray_vu = new char[] { '$', 'u' }, charArray_vv = new char[] { '$', 'v' }, charArray_vw = new char[] { '$', 'w' },
 140                         charArray_vx = new char[] { '$', 'x' }, charArray_vy = new char[] { '$', 'y' }, charArray_vz = new char[] { '$', 'z' };
 141
 142         public final static int MAX_OBVIOUS = 256;
 143
 144         static final int[] ObviousIdentCharNatures = new int[MAX_OBVIOUS];
 145
 146         public final static int C_DOLLAR = 8;
 147
 148         public final static int C_LETTER = 4;
 149
 150         public final static int C_DIGIT = 3;
 151
 152         public final static int C_SEPARATOR = 2;
 153
 154         public final static int C_SPACE = 1;
 155         static {
 156                 for (int i = '0'; i <= '9'; i++)
 157                         ObviousIdentCharNatures[i] = C_DIGIT;
 158
 159                 for (int i = 'a'; i <= 'z'; i++)
 160                         ObviousIdentCharNatures[i] = C_LETTER;
 161                 for (int i = 'A'; i <= 'Z'; i++)
 162                         ObviousIdentCharNatures[i] = C_LETTER;
 163                 ObviousIdentCharNatures['_'] = C_LETTER;
 164                 for (int i = 127; i <= 255; i++)
 165                         ObviousIdentCharNatures[i] = C_LETTER;
 166
 167                 ObviousIdentCharNatures['$'] = C_DOLLAR;
 168
 169                 ObviousIdentCharNatures[10] = C_SPACE; // \ u000a: LINE FEED
 170                 ObviousIdentCharNatures[12] = C_SPACE; // \ u000c: FORM FEED
 171                 ObviousIdentCharNatures[13] = C_SPACE; // \ u000d: CARRIAGE RETURN
 172                 ObviousIdentCharNatures[32] = C_SPACE; // \ u0020: SPACE
 173                 ObviousIdentCharNatures[9] = C_SPACE; // \ u0009: HORIZONTAL TABULATION
 174
 175                 ObviousIdentCharNatures['.'] = C_SEPARATOR;
 176                 ObviousIdentCharNatures[':'] = C_SEPARATOR;
 177                 ObviousIdentCharNatures[';'] = C_SEPARATOR;
 178                 ObviousIdentCharNatures[','] = C_SEPARATOR;
 179                 ObviousIdentCharNatures['['] = C_SEPARATOR;
 180                 ObviousIdentCharNatures[']'] = C_SEPARATOR;
 181                 ObviousIdentCharNatures['('] = C_SEPARATOR;
 182                 ObviousIdentCharNatures[')'] = C_SEPARATOR;
 183                 ObviousIdentCharNatures['{'] = C_SEPARATOR;
 184                 ObviousIdentCharNatures['}'] = C_SEPARATOR;
 185                 ObviousIdentCharNatures['+'] = C_SEPARATOR;
 186                 ObviousIdentCharNatures['-'] = C_SEPARATOR;
 187                 ObviousIdentCharNatures['*'] = C_SEPARATOR;
 188                 ObviousIdentCharNatures['/'] = C_SEPARATOR;
 189                 ObviousIdentCharNatures['='] = C_SEPARATOR;
 190                 ObviousIdentCharNatures['&'] = C_SEPARATOR;
 191                 ObviousIdentCharNatures['|'] = C_SEPARATOR;
 192                 ObviousIdentCharNatures['?'] = C_SEPARATOR;
 193                 ObviousIdentCharNatures['<'] = C_SEPARATOR;
 194                 ObviousIdentCharNatures['>'] = C_SEPARATOR;
 195                 ObviousIdentCharNatures['!'] = C_SEPARATOR;
 196                 ObviousIdentCharNatures['%'] = C_SEPARATOR;
 197                 ObviousIdentCharNatures['^'] = C_SEPARATOR;
 198                 ObviousIdentCharNatures['~'] = C_SEPARATOR;
 199                 ObviousIdentCharNatures['"'] = C_SEPARATOR;
 200                 ObviousIdentCharNatures['\''] = C_SEPARATOR;
 201         }
 202         static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
 203
 204         static final int TableSize = 30, InternalTableSize = 6;
 205
 206         // 30*6 = 180 entries
 207         public static final int OptimizedLength = 6;
 208
 209         public/* static */
 210         final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
 211
 212         // support for detecting non-externalized string literals
 213         int currentLineNr = -1;
 214
 215         int previousLineNr = -1;
 216
 217         NLSLine currentLine = null;
 218
 219         List lines = new ArrayList();
 220
 221         public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
 222
 223         public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
 224
 225         public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
 226
 227         public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
 228
 229         public StringLiteral[] nonNLSStrings = null;
 230
 231         public boolean checkNonExternalizedStringLiterals = true;
 232
 233         public boolean wasNonExternalizedStringLiteral = false;
 234
 235         /* static */{
 236                 for (int i = 0; i < 6; i++) {
 237                         for (int j = 0; j < TableSize; j++) {
 238                                 for (int k = 0; k < InternalTableSize; k++) {
 239                                         charArray_length[i][j][k] = initCharArray;
 240                                 }
 241                         }
 242                 }
 243         }
 244
 245         static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
 246
 247         public static final int RoundBracket = 0;
 248
 249         public static final int SquareBracket = 1;
 250
 251         public static final int CurlyBracket = 2;
 252
 253         public static final int BracketKinds = 3;
 254
 255         // task tag support
 256         public char[][] foundTaskTags = null;
 257
 258         public char[][] foundTaskMessages;
 259
 260         public char[][] foundTaskPriorities = null;
 261
 262         public int[][] foundTaskPositions;
 263
 264         public int foundTaskCount = 0;
 265
 266         public char[][] taskTags = null;
 267
 268         public char[][] taskPriorities = null;
 269
 270         public boolean isTaskCaseSensitive = true;
 271
 272         public static final boolean DEBUG = false;
 273
 274         public static final boolean TRACE = false;
 275
 276         public ICompilationUnit compilationUnit = null;
 277
 278         /**
 279          * Determines if the specified character is permissible as the first character
 280          * in a PHP identifier or variable
 281          *
 282          * The '$' character for PHP variables is regarded as a correct first
 283          * character !
 284          *
 285          */
 286         public static boolean isPHPIdentOrVarStart(char ch) {
 287                 if (ch < MAX_OBVIOUS) {
 288                         return ObviousIdentCharNatures[ch]==C_LETTER || ObviousIdentCharNatures[ch]==C_DOLLAR;
 289                 }
 290                 return false;
 291                 //return Character.isLetter(ch) || (ch == '$') || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 292         }
 293
 294         /**
 295          * Determines if the specified character is permissible as the first character
 296          * in a PHP identifier.
 297          *
 298          * The '$' character for PHP variables isn't regarded as the first character !
 299          */
 300         public static boolean isPHPIdentifierStart(char ch) {
 301                 if (ch < MAX_OBVIOUS) {
 302                         return ObviousIdentCharNatures[ch]==C_LETTER;
 303                 }
 304                 return false;
 305 //              return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 306         }
 307
 308         /**
 309          * Determines if the specified character may be part of a PHP identifier as
 310          * other than the first character
 311          */
 312         public static boolean isPHPIdentifierPart(char ch) {
 313                 if (ch < MAX_OBVIOUS) {
 314                         return ObviousIdentCharNatures[ch]==C_LETTER || ObviousIdentCharNatures[ch]==C_DIGIT;
 315                 }
 316                 return false;
 317 //              return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
 318         }
 319
 320         public static boolean isSQLIdentifierPart(char ch) {
 321                 if (ch < MAX_OBVIOUS) {
 322                         return ObviousIdentCharNatures[ch]==C_LETTER || ObviousIdentCharNatures[ch]==C_DIGIT;
 323                 }
 324                 return false;
 325         }
 326
 327         public final boolean atEnd() {
 328                 // This code is not relevant if source is
 329                 // Only a part of the real stream input
 330                 return source.length == currentPosition;
 331         }
 332
 333         public char[] getCurrentIdentifierSource() {
 334                 // return the token REAL source (aka unicodes are precomputed)
 335                 char[] result;
 336                 // if (withoutUnicodePtr != 0)
 337                 // //0 is used as a fast test flag so the real first char is in position 1
 338                 // System.arraycopy(
 339                 // withoutUnicodeBuffer,
 340                 // 1,
 341                 // result = new char[withoutUnicodePtr],
 342                 // 0,
 343                 // withoutUnicodePtr);
 344                 // else {
 345                 int length = currentPosition - startPosition;
 346                 switch (length) { // see OptimizedLength
 347                 case 1:
 348                         return optimizedCurrentTokenSource1();
 349                 case 2:
 350                         return optimizedCurrentTokenSource2();
 351                 case 3:
 352                         return optimizedCurrentTokenSource3();
 353                 case 4:
 354                         return optimizedCurrentTokenSource4();
 355                 case 5:
 356                         return optimizedCurrentTokenSource5();
 357                 case 6:
 358                         return optimizedCurrentTokenSource6();
 359                 }
 360                 // no optimization
 361                 System.arraycopy(source, startPosition, result = new char[length], 0, length);
 362                 // }
 363                 return result;
 364         }
 365
 366         public int getCurrentTokenEndPosition() {
 367                 return this.currentPosition - 1;
 368         }
 369
 370         public final char[] getCurrentTokenSource() {
 371                 // Return the token REAL source (aka unicodes are precomputed)
 372                 char[] result;
 373                 // if (withoutUnicodePtr != 0)
 374                 // // 0 is used as a fast test flag so the real first char is in position 1
 375                 // System.arraycopy(
 376                 // withoutUnicodeBuffer,
 377                 // 1,
 378                 // result = new char[withoutUnicodePtr],
 379                 // 0,
 380                 // withoutUnicodePtr);
 381                 // else {
 382                 int length;
 383                 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
 384                 // }
 385                 return result;
 386         }
 387
 388         public final char[] getCurrentTokenSource(int startPos) {
 389                 // Return the token REAL source (aka unicodes are precomputed)
 390                 char[] result;
 391                 // if (withoutUnicodePtr != 0)
 392                 // // 0 is used as a fast test flag so the real first char is in position 1
 393                 // System.arraycopy(
 394                 // withoutUnicodeBuffer,
 395                 // 1,
 396                 // result = new char[withoutUnicodePtr],
 397                 // 0,
 398                 // withoutUnicodePtr);
 399                 // else {
 400                 int length;
 401                 System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
 402                 // }
 403                 return result;
 404         }
 405
 406         public final char[] getCurrentTokenSourceString() {
 407                 // return the token REAL source (aka unicodes are precomputed).
 408                 // REMOVE the two " that are at the beginning and the end.
 409                 char[] result;
 410                 if (withoutUnicodePtr != 0)
 411                         // 0 is used as a fast test flag so the real first char is in position 1
 412                         System.arraycopy(withoutUnicodeBuffer, 2,
 413                         // 2 is 1 (real start) + 1 (to jump over the ")
 414                                         result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
 415                 else {
 416                         int length;
 417                         System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 418                 }
 419                 return result;
 420         }
 421
 422         public final char[] getRawTokenSourceEnd() {
 423                 int length = this.eofPosition - this.currentPosition - 1;
 424                 char[] sourceEnd = new char[length];
 425                 System.arraycopy(this.source, this.currentPosition, sourceEnd, 0, length);
 426                 return sourceEnd;
 427         }
 428
 429         public int getCurrentTokenStartPosition() {
 430                 return this.startPosition;
 431         }
 432
 433         public final char[] getCurrentStringLiteralSource() {
 434                 // Return the token REAL source (aka unicodes are precomputed)
 435                 if (startPosition + 1 >= currentPosition) {
 436                         return new char[0];
 437                 }
 438                 char[] result;
 439                 int length;
 440                 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
 441                 // }
 442                 return result;
 443         }
 444
 445         public final char[] getCurrentStringLiteralSource(int startPos) {
 446                 // Return the token REAL source (aka unicodes are precomputed)
 447                 char[] result;
 448                 int length;
 449                 System.arraycopy(source, startPos + 1, result = new char[length = currentPosition - startPos - 2], 0, length);
 450                 // }
 451                 return result;
 452         }
 453
 454         /*
 455          * Search the source position corresponding to the end of a given line number
 456          *
 457          * Line numbers are 1-based, and relative to the scanner initialPosition.
 458          * Character positions are 0-based.
 459          *
 460          * In case the given line number is inconsistent, answers -1.
 461          */
 462         public final int getLineEnd(int lineNumber) {
 463                 if (lineEnds == null)
 464                         return -1;
 465                 if (lineNumber >= lineEnds.length)
 466                         return -1;
 467                 if (lineNumber <= 0)
 468                         return -1;
 469                 if (lineNumber == lineEnds.length - 1)
 470                         return eofPosition;
 471                 return lineEnds[lineNumber - 1];
 472                 // next line start one character behind the lineEnd of the previous line
 473         }
 474
 475         /**
 476          * Search the source position corresponding to the beginning of a given line
 477          * number
 478          *
 479          * Line numbers are 1-based, and relative to the scanner initialPosition.
 480          * Character positions are 0-based.
 481          *
 482          * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
 483          *
 484          * In case the given line number is inconsistent, answers -1.
 485          */
 486         public final int getLineStart(int lineNumber) {
 487                 if (lineEnds == null)
 488                         return -1;
 489                 if (lineNumber >= lineEnds.length)
 490                         return -1;
 491                 if (lineNumber <= 0)
 492                         return -1;
 493                 if (lineNumber == 1)
 494                         return initialPosition;
 495                 return lineEnds[lineNumber - 2] + 1;
 496                 // next line start one character behind the lineEnd of the previous line
 497         }
 498
 499         public final boolean getNextChar(char testedChar) {
 500                 // BOOLEAN
 501                 // handle the case of unicode.
 502                 // when a unicode appears then we must use a buffer that holds char
 503                 // internal values
 504                 // At the end of this method currentCharacter holds the new visited char
 505                 // and currentPosition points right next after it
 506                 // Both previous lines are true if the currentCharacter is == to the
 507                 // testedChar
 508                 // On false, no side effect has occured.
 509                 // ALL getNextChar.... ARE OPTIMIZED COPIES
 510                 int temp = currentPosition;
 511                 try {
 512                         currentCharacter = source[currentPosition++];
 513                         // if (((currentCharacter = source[currentPosition++]) == '\\')
 514                         // && (source[currentPosition] == 'u')) {
 515                         // //-------------unicode traitement ------------
 516                         // int c1, c2, c3, c4;
 517                         // int unicodeSize = 6;
 518                         // currentPosition++;
 519                         // while (source[currentPosition] == 'u') {
 520                         // currentPosition++;
 521                         // unicodeSize++;
 522                         // }
 523                         //
 524                         // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 525                         // || c1 < 0)
 526                         // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 527                         // || c2 < 0)
 528                         // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 529                         // || c3 < 0)
 530                         // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 531                         // || c4 < 0)) {
 532                         // currentPosition = temp;
 533                         // return false;
 534                         // }
 535                         //
 536                         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 537                         // if (currentCharacter != testedChar) {
 538                         // currentPosition = temp;
 539                         // return false;
 540                         // }
 541                         // unicodeAsBackSlash = currentCharacter == '\\';
 542                         //
 543                         // //need the unicode buffer
 544                         // if (withoutUnicodePtr == 0) {
 545                         // //buffer all the entries that have been left aside....
 546                         // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 547                         // System.arraycopy(
 548                         // source,
 549                         // startPosition,
 550                         // withoutUnicodeBuffer,
 551                         // 1,
 552                         // withoutUnicodePtr);
 553                         // }
 554                         // //fill the buffer with the char
 555                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 556                         // return true;
 557                         //
 558                         // } //-------------end unicode traitement--------------
 559                         // else {
 560                         if (currentCharacter != testedChar) {
 561                                 currentPosition = temp;
 562                                 return false;
 563                         }
 564                         unicodeAsBackSlash = false;
 565                         // if (withoutUnicodePtr != 0)
 566                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 567                         return true;
 568                         // }
 569                 } catch (IndexOutOfBoundsException e) {
 570                         unicodeAsBackSlash = false;
 571                         currentPosition = temp;
 572                         return false;
 573                 }
 574         }
 575
 576         public final int getNextChar(char testedChar1, char testedChar2) {
 577                 // INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
 578                 // test can be done with (x==0) for the first and (x>0) for the second
 579                 // handle the case of unicode.
 580                 // when a unicode appears then we must use a buffer that holds char
 581                 // internal values
 582                 // At the end of this method currentCharacter holds the new visited char
 583                 // and currentPosition points right next after it
 584                 // Both previous lines are true if the currentCharacter is == to the
 585                 // testedChar1/2
 586                 // On false, no side effect has occured.
 587                 // ALL getNextChar.... ARE OPTIMIZED COPIES
 588                 int temp = currentPosition;
 589                 try {
 590                         int result;
 591                         currentCharacter = source[currentPosition++];
 592                         // if (((currentCharacter = source[currentPosition++]) == '\\')
 593                         // && (source[currentPosition] == 'u')) {
 594                         // //-------------unicode traitement ------------
 595                         // int c1, c2, c3, c4;
 596                         // int unicodeSize = 6;
 597                         // currentPosition++;
 598                         // while (source[currentPosition] == 'u') {
 599                         // currentPosition++;
 600                         // unicodeSize++;
 601                         // }
 602                         //
 603                         // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 604                         // || c1 < 0)
 605                         // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 606                         // || c2 < 0)
 607                         // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 608                         // || c3 < 0)
 609                         // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 610                         // || c4 < 0)) {
 611                         // currentPosition = temp;
 612                         // return 2;
 613                         // }
 614                         //
 615                         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 616                         // if (currentCharacter == testedChar1)
 617                         // result = 0;
 618                         // else if (currentCharacter == testedChar2)
 619                         // result = 1;
 620                         // else {
 621                         // currentPosition = temp;
 622                         // return -1;
 623                         // }
 624                         //
 625                         // //need the unicode buffer
 626                         // if (withoutUnicodePtr == 0) {
 627                         // //buffer all the entries that have been left aside....
 628                         // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 629                         // System.arraycopy(
 630                         // source,
 631                         // startPosition,
 632                         // withoutUnicodeBuffer,
 633                         // 1,
 634                         // withoutUnicodePtr);
 635                         // }
 636                         // //fill the buffer with the char
 637                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 638                         // return result;
 639                         // } //-------------end unicode traitement--------------
 640                         // else {
 641                         if (currentCharacter == testedChar1)
 642                                 result = 0;
 643                         else if (currentCharacter == testedChar2)
 644                                 result = 1;
 645                         else {
 646                                 currentPosition = temp;
 647                                 return -1;
 648                         }
 649                         // if (withoutUnicodePtr != 0)
 650                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 651                         return result;
 652                         // }
 653                 } catch (IndexOutOfBoundsException e) {
 654                         currentPosition = temp;
 655                         return -1;
 656                 }
 657         }
 658
 659         public final boolean getNextCharAsDigit() {
 660                 // BOOLEAN
 661                 // handle the case of unicode.
 662                 // when a unicode appears then we must use a buffer that holds char
 663                 // internal values
 664                 // At the end of this method currentCharacter holds the new visited char
 665                 // and currentPosition points right next after it
 666                 // Both previous lines are true if the currentCharacter is a digit
 667                 // On false, no side effect has occured.
 668                 // ALL getNextChar.... ARE OPTIMIZED COPIES
 669                 int temp = currentPosition;
 670                 try {
 671                         currentCharacter = source[currentPosition++];
 672                         // if (((currentCharacter = source[currentPosition++]) == '\\')
 673                         // && (source[currentPosition] == 'u')) {
 674                         // //-------------unicode traitement ------------
 675                         // int c1, c2, c3, c4;
 676                         // int unicodeSize = 6;
 677                         // currentPosition++;
 678                         // while (source[currentPosition] == 'u') {
 679                         // currentPosition++;
 680                         // unicodeSize++;
 681                         // }
 682                         //
 683                         // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 684                         // || c1 < 0)
 685                         // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 686                         // || c2 < 0)
 687                         // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 688                         // || c3 < 0)
 689                         // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 690                         // || c4 < 0)) {
 691                         // currentPosition = temp;
 692                         // return false;
 693                         // }
 694                         //
 695                         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 696                         // if (!Character.isDigit(currentCharacter)) {
 697                         // currentPosition = temp;
 698                         // return false;
 699                         // }
 700                         //
 701                         // //need the unicode buffer
 702                         // if (withoutUnicodePtr == 0) {
 703                         // //buffer all the entries that have been left aside....
 704                         // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 705                         // System.arraycopy(
 706                         // source,
 707                         // startPosition,
 708                         // withoutUnicodeBuffer,
 709                         // 1,
 710                         // withoutUnicodePtr);
 711                         // }
 712                         // //fill the buffer with the char
 713                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 714                         // return true;
 715                         // } //-------------end unicode traitement--------------
 716                         // else {
 717                         if (!Character.isDigit(currentCharacter)) {
 718                                 currentPosition = temp;
 719                                 return false;
 720                         }
 721                         // if (withoutUnicodePtr != 0)
 722                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 723                         return true;
 724                         // }
 725                 } catch (IndexOutOfBoundsException e) {
 726                         currentPosition = temp;
 727                         return false;
 728                 }
 729         }
 730
 731         public final boolean getNextCharAsDigit(int radix) {
 732                 // BOOLEAN
 733                 // handle the case of unicode.
 734                 // when a unicode appears then we must use a buffer that holds char
 735                 // internal values
 736                 // At the end of this method currentCharacter holds the new visited char
 737                 // and currentPosition points right next after it
 738                 // Both previous lines are true if the currentCharacter is a digit base on
 739                 // radix
 740                 // On false, no side effect has occured.
 741                 // ALL getNextChar.... ARE OPTIMIZED COPIES
 742                 int temp = currentPosition;
 743                 try {
 744                         currentCharacter = source[currentPosition++];
 745                         // if (((currentCharacter = source[currentPosition++]) == '\\')
 746                         // && (source[currentPosition] == 'u')) {
 747                         // //-------------unicode traitement ------------
 748                         // int c1, c2, c3, c4;
 749                         // int unicodeSize = 6;
 750                         // currentPosition++;
 751                         // while (source[currentPosition] == 'u') {
 752                         // currentPosition++;
 753                         // unicodeSize++;
 754                         // }
 755                         //
 756                         // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 757                         // || c1 < 0)
 758                         // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 759                         // || c2 < 0)
 760                         // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 761                         // || c3 < 0)
 762                         // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 763                         // || c4 < 0)) {
 764                         // currentPosition = temp;
 765                         // return false;
 766                         // }
 767                         //
 768                         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 769                         // if (Character.digit(currentCharacter, radix) == -1) {
 770                         // currentPosition = temp;
 771                         // return false;
 772                         // }
 773                         //
 774                         // //need the unicode buffer
 775                         // if (withoutUnicodePtr == 0) {
 776                         // //buffer all the entries that have been left aside....
 777                         // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 778                         // System.arraycopy(
 779                         // source,
 780                         // startPosition,
 781                         // withoutUnicodeBuffer,
 782                         // 1,
 783                         // withoutUnicodePtr);
 784                         // }
 785                         // //fill the buffer with the char
 786                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 787                         // return true;
 788                         // } //-------------end unicode traitement--------------
 789                         // else {
 790                         if (Character.digit(currentCharacter, radix) == -1) {
 791                                 currentPosition = temp;
 792                                 return false;
 793                         }
 794                         // if (withoutUnicodePtr != 0)
 795                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 796                         return true;
 797                         // }
 798                 } catch (IndexOutOfBoundsException e) {
 799                         currentPosition = temp;
 800                         return false;
 801                 }
 802         }
 803
 804         public boolean getNextCharAsJavaIdentifierPart() {
 805                 // BOOLEAN
 806                 // handle the case of unicode.
 807                 // when a unicode appears then we must use a buffer that holds char
 808                 // internal values
 809                 // At the end of this method currentCharacter holds the new visited char
 810                 // and currentPosition points right next after it
 811                 // Both previous lines are true if the currentCharacter is a
 812                 // JavaIdentifierPart
 813                 // On false, no side effect has occured.
 814                 // ALL getNextChar.... ARE OPTIMIZED COPIES
 815                 int temp = currentPosition;
 816                 try {
 817                         currentCharacter = source[currentPosition++];
 818                         // if (((currentCharacter = source[currentPosition++]) == '\\')
 819                         // && (source[currentPosition] == 'u')) {
 820                         // //-------------unicode traitement ------------
 821                         // int c1, c2, c3, c4;
 822                         // int unicodeSize = 6;
 823                         // currentPosition++;
 824                         // while (source[currentPosition] == 'u') {
 825                         // currentPosition++;
 826                         // unicodeSize++;
 827                         // }
 828                         //
 829                         // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 830                         // || c1 < 0)
 831                         // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 832                         // || c2 < 0)
 833                         // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 834                         // || c3 < 0)
 835                         // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 836                         // || c4 < 0)) {
 837                         // currentPosition = temp;
 838                         // return false;
 839                         // }
 840                         //
 841                         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 842                         // if (!isPHPIdentifierPart(currentCharacter)) {
 843                         // currentPosition = temp;
 844                         // return false;
 845                         // }
 846                         //
 847                         // //need the unicode buffer
 848                         // if (withoutUnicodePtr == 0) {
 849                         // //buffer all the entries that have been left aside....
 850                         // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 851                         // System.arraycopy(
 852                         // source,
 853                         // startPosition,
 854                         // withoutUnicodeBuffer,
 855                         // 1,
 856                         // withoutUnicodePtr);
 857                         // }
 858                         // //fill the buffer with the char
 859                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 860                         // return true;
 861                         // } //-------------end unicode traitement--------------
 862                         // else {
 863                         if (!isPHPIdentifierPart(currentCharacter)) {
 864                                 currentPosition = temp;
 865                                 return false;
 866                         }
 867                         // if (withoutUnicodePtr != 0)
 868                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 869                         return true;
 870                         // }
 871                 } catch (IndexOutOfBoundsException e) {
 872                         currentPosition = temp;
 873                         return false;
 874                 }
 875         }
 876
 877         public int getCastOrParen() {
 878                 int tempPosition = currentPosition;
 879                 char tempCharacter = currentCharacter;
 880                 int tempToken = TokenNameLPAREN;
 881                 boolean found = false;
 882                 StringBuffer buf = new StringBuffer();
 883                 try {
 884                         do {
 885                                 currentCharacter = source[currentPosition++];
 886                         } while (currentCharacter == ' ' || currentCharacter == '\t');
 887                         while (ObviousIdentCharNatures[currentCharacter]==C_LETTER) {
 888                                 //      while((currentCharacter >= 'a' && currentCharacter <= 'z') || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
 889                                 buf.append(currentCharacter);
 890                                 currentCharacter = source[currentPosition++];
 891                         }
 892                         if (buf.length() >= 3 && buf.length() <= 7) {
 893                                 char[] data = buf.toString().toCharArray();
 894                                 int index = 0;
 895                                 switch (data.length) {
 896                                 case 3:
 897                                         // int
 898                                         if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
 899                                                 found = true;
 900                                                 tempToken = TokenNameintCAST;
 901                                         }
 902                                         break;
 903                                 case 4:
 904                                         // bool real
 905                                         if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
 906                                                 found = true;
 907                                                 tempToken = TokenNameboolCAST;
 908                                         } else {
 909                                                 index = 0;
 910                                                 if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
 911                                                         found = true;
 912                                                         tempToken = TokenNamedoubleCAST;
 913                                                 }
 914                                         }
 915                                         break;
 916                                 case 5:
 917                                         // array unset float
 918                                         if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
 919                                                         && (data[++index] == 'y')) {
 920                                                 found = true;
 921                                                 tempToken = TokenNamearrayCAST;
 922                                         } else {
 923                                                 index = 0;
 924                                                 if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
 925                                                                 && (data[++index] == 't')) {
 926                                                         found = true;
 927                                                         tempToken = TokenNameunsetCAST;
 928                                                 } else {
 929                                                         index = 0;
 930                                                         if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
 931                                                                         && (data[++index] == 't')) {
 932                                                                 found = true;
 933                                                                 tempToken = TokenNamedoubleCAST;
 934                                                         }
 935                                                 }
 936                                         }
 937                                         break;
 938                                 case 6:
 939                                         // object string double
 940                                         if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
 941                                                         && (data[++index] == 'c') && (data[++index] == 't')) {
 942                                                 found = true;
 943                                                 tempToken = TokenNameobjectCAST;
 944                                         } else {
 945                                                 index = 0;
 946                                                 if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
 947                                                                 && (data[++index] == 'n') && (data[++index] == 'g')) {
 948                                                         found = true;
 949                                                         tempToken = TokenNamestringCAST;
 950                                                 } else {
 951                                                         index = 0;
 952                                                         if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
 953                                                                         && (data[++index] == 'l') && (data[++index] == 'e')) {
 954                                                                 found = true;
 955                                                                 tempToken = TokenNamedoubleCAST;
 956                                                         }
 957                                                 }
 958                                         }
 959                                         break;
 960                                 case 7:
 961                                         // boolean integer
 962                                         if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
 963                                                         && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
 964                                                 found = true;
 965                                                 tempToken = TokenNameboolCAST;
 966                                         } else {
 967                                                 index = 0;
 968                                                 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
 969                                                                 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
 970                                                         found = true;
 971                                                         tempToken = TokenNameintCAST;
 972                                                 }
 973                                         }
 974                                         break;
 975                                 }
 976                                 if (found) {
 977                                         while (currentCharacter == ' ' || currentCharacter == '\t') {
 978                                                 currentCharacter = source[currentPosition++];
 979                                         }
 980                                         if (currentCharacter == ')') {
 981                                                 return tempToken;
 982                                         }
 983                                 }
 984                         }
 985                 } catch (IndexOutOfBoundsException e) {
 986                 }
 987                 currentCharacter = tempCharacter;
 988                 currentPosition = tempPosition;
 989                 return TokenNameLPAREN;
 990         }
 991
 992         public void consumeStringInterpolated() throws InvalidInputException {
 993                 try {
 994                         // consume next character
 995                         unicodeAsBackSlash = false;
 996                         currentCharacter = source[currentPosition++];
 997                         // if (((currentCharacter = source[currentPosition++]) == '\\')
 998                         // && (source[currentPosition] == 'u')) {
 999                         // getNextUnicodeChar();
1000                         // } else {
1001                         // if (withoutUnicodePtr != 0) {
1002                         // withoutUnicodeBuffer[++withoutUnicodePtr] =
1003                         // currentCharacter;
1004                         // }
1005                         // }
1006                         while (currentCharacter != '`') {
1007                                 /** ** in PHP \r and \n are valid in string literals *** */
1008                                 // if ((currentCharacter == '\n')
1009                                 // || (currentCharacter == '\r')) {
1010                                 // // relocate if finding another quote fairly close: thus unicode
1011                                 // '/u000D' will be fully consumed
1012                                 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1013                                 // if (currentPosition + lookAhead == source.length)
1014                                 // break;
1015                                 // if (source[currentPosition + lookAhead] == '\n')
1016                                 // break;
1017                                 // if (source[currentPosition + lookAhead] == '\"') {
1018                                 // currentPosition += lookAhead + 1;
1019                                 // break;
1020                                 // }
1021                                 // }
1022                                 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1023                                 // }
1024                                 if (currentCharacter == '\\') {
1025                                         int escapeSize = currentPosition;
1026                                         boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1027                                         // scanEscapeCharacter make a side effect on this value and we need
1028                                         // the previous value few lines down this one
1029                                         scanDoubleQuotedEscapeCharacter();
1030                                         escapeSize = currentPosition - escapeSize;
1031                                         if (withoutUnicodePtr == 0) {
1032                                                 // buffer all the entries that have been left aside....
1033                                                 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1034                                                 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1035                                                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1036                                         } else { // overwrite the / in the buffer
1037                                                 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1038                                                 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1039                                                         // where only one is correct
1040                                                         withoutUnicodePtr--;
1041                                                 }
1042                                         }
1043                                 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1044                                         if (recordLineSeparator) {
1045                                                 pushLineSeparator();
1046                                         }
1047                                 }
1048                                 // consume next character
1049                                 unicodeAsBackSlash = false;
1050                                 currentCharacter = source[currentPosition++];
1051                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
1052                                 // && (source[currentPosition] == 'u')) {
1053                                 // getNextUnicodeChar();
1054                                 // } else {
1055                                 if (withoutUnicodePtr != 0) {
1056                                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1057                                 }
1058                                 // }
1059                         }
1060                 } catch (IndexOutOfBoundsException e) {
1061                         // reset end position for error reporting
1062                         currentPosition -= 2;
1063                         throw new InvalidInputException(UNTERMINATED_STRING);
1064                 } catch (InvalidInputException e) {
1065                         if (e.getMessage().equals(INVALID_ESCAPE)) {
1066                                 // relocate if finding another quote fairly close: thus unicode
1067                                 // '/u000D' will be fully consumed
1068                                 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1069                                         if (currentPosition + lookAhead == source.length)
1070                                                 break;
1071                                         if (source[currentPosition + lookAhead] == '\n')
1072                                                 break;
1073                                         if (source[currentPosition + lookAhead] == '`') {
1074                                                 currentPosition += lookAhead + 1;
1075                                                 break;
1076                                         }
1077                                 }
1078                         }
1079                         throw e; // rethrow
1080                 }
1081                 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1082                         // //$NON-NLS-?$ where ? is an
1083                         // int.
1084                         if (currentLine == null) {
1085                                 currentLine = new NLSLine();
1086                                 lines.add(currentLine);
1087                         }
1088                         currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1089                 }
1090         }
1091
1092         public void consumeStringConstant() throws InvalidInputException {
1093                 try {
1094                         // consume next character
1095                         unicodeAsBackSlash = false;
1096                         currentCharacter = source[currentPosition++];
1097                         // if (((currentCharacter = source[currentPosition++]) == '\\')
1098                         // && (source[currentPosition] == 'u')) {
1099                         // getNextUnicodeChar();
1100                         // } else {
1101                         // if (withoutUnicodePtr != 0) {
1102                         // withoutUnicodeBuffer[++withoutUnicodePtr] =
1103                         // currentCharacter;
1104                         // }
1105                         // }
1106                         while (currentCharacter != '\'') {
1107                                 /** ** in PHP \r and \n are valid in string literals *** */
1108                                 // if ((currentCharacter == '\n')
1109                                 // || (currentCharacter == '\r')) {
1110                                 // // relocate if finding another quote fairly close: thus unicode
1111                                 // '/u000D' will be fully consumed
1112                                 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1113                                 // if (currentPosition + lookAhead == source.length)
1114                                 // break;
1115                                 // if (source[currentPosition + lookAhead] == '\n')
1116                                 // break;
1117                                 // if (source[currentPosition + lookAhead] == '\"') {
1118                                 // currentPosition += lookAhead + 1;
1119                                 // break;
1120                                 // }
1121                                 // }
1122                                 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1123                                 // }
1124                                 if (currentCharacter == '\\') {
1125                                         int escapeSize = currentPosition;
1126                                         boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1127                                         // scanEscapeCharacter make a side effect on this value and we need
1128                                         // the previous value few lines down this one
1129                                         scanSingleQuotedEscapeCharacter();
1130                                         escapeSize = currentPosition - escapeSize;
1131                                         if (withoutUnicodePtr == 0) {
1132                                                 // buffer all the entries that have been left aside....
1133                                                 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1134                                                 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1135                                                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1136                                         } else { // overwrite the / in the buffer
1137                                                 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1138                                                 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1139                                                         // where only one is correct
1140                                                         withoutUnicodePtr--;
1141                                                 }
1142                                         }
1143                                 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1144                                         if (recordLineSeparator) {
1145                                                 pushLineSeparator();
1146                                         }
1147                                 }
1148                                 // consume next character
1149                                 unicodeAsBackSlash = false;
1150                                 currentCharacter = source[currentPosition++];
1151                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
1152                                 // && (source[currentPosition] == 'u')) {
1153                                 // getNextUnicodeChar();
1154                                 // } else {
1155                                 if (withoutUnicodePtr != 0) {
1156                                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1157                                 }
1158                                 // }
1159                         }
1160                 } catch (IndexOutOfBoundsException e) {
1161                         // reset end position for error reporting
1162                         currentPosition -= 2;
1163                         throw new InvalidInputException(UNTERMINATED_STRING);
1164                 } catch (InvalidInputException e) {
1165                         if (e.getMessage().equals(INVALID_ESCAPE)) {
1166                                 // relocate if finding another quote fairly close: thus unicode
1167                                 // '/u000D' will be fully consumed
1168                                 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1169                                         if (currentPosition + lookAhead == source.length)
1170                                                 break;
1171                                         if (source[currentPosition + lookAhead] == '\n')
1172                                                 break;
1173                                         if (source[currentPosition + lookAhead] == '\'') {
1174                                                 currentPosition += lookAhead + 1;
1175                                                 break;
1176                                         }
1177                                 }
1178                         }
1179                         throw e; // rethrow
1180                 }
1181                 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1182                         // //$NON-NLS-?$ where ? is an
1183                         // int.
1184                         if (currentLine == null) {
1185                                 currentLine = new NLSLine();
1186                                 lines.add(currentLine);
1187                         }
1188                         currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1189                 }
1190         }
1191
1192         public void consumeStringLiteral() throws InvalidInputException {
1193                 try {
1194                         boolean openDollarBrace = false;
1195                         // consume next character
1196                         unicodeAsBackSlash = false;
1197                         currentCharacter = source[currentPosition++];
1198                         while (currentCharacter != '"' || openDollarBrace) {
1199                                 /** ** in PHP \r and \n are valid in string literals *** */
1200                                 if (currentCharacter == '\\') {
1201                                         int escapeSize = currentPosition;
1202                                         boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1203                                         // scanEscapeCharacter make a side effect on this value and we need
1204                                         // the previous value few lines down this one
1205                                         scanDoubleQuotedEscapeCharacter();
1206                                         escapeSize = currentPosition - escapeSize;
1207                                         if (withoutUnicodePtr == 0) {
1208                                                 // buffer all the entries that have been left aside....
1209                                                 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1210                                                 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1211                                                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1212                                         } else { // overwrite the / in the buffer
1213                                                 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1214                                                 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1215                                                         // where only one is correct
1216                                                         withoutUnicodePtr--;
1217                                                 }
1218                                         }
1219                                 } else if (currentCharacter == '$' && source[currentPosition] == '{') {
1220                                         openDollarBrace = true;
1221                                 } else if (currentCharacter == '{' && source[currentPosition] == '$') {
1222                                         openDollarBrace = true;
1223                                 } else if (currentCharacter == '}') {
1224                                         openDollarBrace = false;
1225                                 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1226                                         if (recordLineSeparator) {
1227                                                 pushLineSeparator();
1228                                         }
1229                                 }
1230                                 // consume next character
1231                                 unicodeAsBackSlash = false;
1232                                 currentCharacter = source[currentPosition++];
1233                                 if (withoutUnicodePtr != 0) {
1234                                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1235                                 }
1236                         }
1237                 } catch (IndexOutOfBoundsException e) {
1238                         // reset end position for error reporting
1239                         currentPosition -= 2;
1240                         throw new InvalidInputException(UNTERMINATED_STRING);
1241                 } catch (InvalidInputException e) {
1242                         if (e.getMessage().equals(INVALID_ESCAPE)) {
1243                                 // relocate if finding another quote fairly close: thus unicode
1244                                 // '/u000D' will be fully consumed
1245                                 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1246                                         if (currentPosition + lookAhead == source.length)
1247                                                 break;
1248                                         if (source[currentPosition + lookAhead] == '\n')
1249                                                 break;
1250                                         if (source[currentPosition + lookAhead] == '\"') {
1251                                                 currentPosition += lookAhead + 1;
1252                                                 break;
1253                                         }
1254                                 }
1255                         }
1256                         throw e; // rethrow
1257                 }
1258                 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1259                         // //$NON-NLS-?$ where ? is an
1260                         // int.
1261                         if (currentLine == null) {
1262                                 currentLine = new NLSLine();
1263                                 lines.add(currentLine);
1264                         }
1265                         currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1266                 }
1267         }
1268
1269         public int getNextToken() throws InvalidInputException {
1270                 phpExpressionTag = false;
1271                 if (!phpMode) {
1272                         return getInlinedHTMLToken(currentPosition);
1273                 }
1274                 if (phpMode) {
1275                         this.wasAcr = false;
1276                         if (diet) {
1277                                 jumpOverMethodBody();
1278                                 diet = false;
1279                                 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1280                         }
1281                         try {
1282                                 while (true) {
1283                                         withoutUnicodePtr = 0;
1284                                         // start with a new token
1285                                         char encapsedChar = ' ';
1286                                         // if (!encapsedStringStack.isEmpty()) {
1287                                         // encapsedChar = ((Character)
1288                                         // encapsedStringStack.peek()).charValue();
1289                                         // }
1290                                         // if (encapsedChar != '$' && encapsedChar != ' ') {
1291                                         // currentCharacter = source[currentPosition++];
1292                                         // if (currentCharacter == encapsedChar) {
1293                                         // switch (currentCharacter) {
1294                                         // case '`':
1295                                         // return TokenNameEncapsedString0;
1296                                         // case '\'':
1297                                         // return TokenNameEncapsedString1;
1298                                         // case '"':
1299                                         // return TokenNameEncapsedString2;
1300                                         // }
1301                                         // }
1302                                         // while (currentCharacter != encapsedChar) {
1303                                         // /** ** in PHP \r and \n are valid in string literals *** */
1304                                         // switch (currentCharacter) {
1305                                         // case '\\':
1306                                         // int escapeSize = currentPosition;
1307                                         // boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1308                                         // //scanEscapeCharacter make a side effect on this value and
1309                                         // // we need the previous value few lines down this one
1310                                         // scanDoubleQuotedEscapeCharacter();
1311                                         // escapeSize = currentPosition - escapeSize;
1312                                         // if (withoutUnicodePtr == 0) {
1313                                         // //buffer all the entries that have been left aside....
1314                                         // withoutUnicodePtr = currentPosition - escapeSize - 1 -
1315                                         // startPosition;
1316                                         // System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
1317                                         // withoutUnicodePtr);
1318                                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1319                                         // } else { //overwrite the / in the buffer
1320                                         // withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1321                                         // if (backSlashAsUnicodeInString) { //there are TWO \ in
1322                                         // withoutUnicodePtr--;
1323                                         // }
1324                                         // }
1325                                         // break;
1326                                         // case '\r':
1327                                         // case '\n':
1328                                         // if (recordLineSeparator) {
1329                                         // pushLineSeparator();
1330                                         // }
1331                                         // break;
1332                                         // case '$':
1333                                         // if (isPHPIdentifierStart(source[currentPosition]) ||
1334                                         // source[currentPosition] == '{') {
1335                                         // currentPosition--;
1336                                         // encapsedStringStack.push(new Character('$'));
1337                                         // return TokenNameSTRING;
1338                                         // }
1339                                         // break;
1340                                         // case '{':
1341                                         // if (source[currentPosition] == '$') { // CURLY_OPEN
1342                                         // currentPosition--;
1343                                         // encapsedStringStack.push(new Character('$'));
1344                                         // return TokenNameSTRING;
1345                                         // }
1346                                         // }
1347                                         // // consume next character
1348                                         // unicodeAsBackSlash = false;
1349                                         // currentCharacter = source[currentPosition++];
1350                                         // if (withoutUnicodePtr != 0) {
1351                                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1352                                         // }
1353                                         // // }
1354                                         // } // end while
1355                                         // currentPosition--;
1356                                         // return TokenNameSTRING;
1357                                         // }
1358                                         // ---------Consume white space and handles startPosition---------
1359                                         int whiteStart = currentPosition;
1360                                         startPosition = currentPosition;
1361                                         currentCharacter = source[currentPosition++];
1362                                         // if (encapsedChar == '$') {
1363                                         // switch (currentCharacter) {
1364                                         // case '\\':
1365                                         // currentCharacter = source[currentPosition++];
1366                                         // return TokenNameSTRING;
1367                                         // case '{':
1368                                         // if (encapsedChar == '$') {
1369                                         // if (getNextChar('$'))
1370                                         // return TokenNameLBRACE_DOLLAR;
1371                                         // }
1372                                         // return TokenNameLBRACE;
1373                                         // case '}':
1374                                         // return TokenNameRBRACE;
1375                                         // case '[':
1376                                         // return TokenNameLBRACKET;
1377                                         // case ']':
1378                                         // return TokenNameRBRACKET;
1379                                         // case '\'':
1380                                         // if (tokenizeStrings) {
1381                                         // consumeStringConstant();
1382                                         // return TokenNameStringSingleQuote;
1383                                         // }
1384                                         // return TokenNameEncapsedString1;
1385                                         // case '"':
1386                                         // return TokenNameEncapsedString2;
1387                                         // case '`':
1388                                         // if (tokenizeStrings) {
1389                                         // consumeStringInterpolated();
1390                                         // return TokenNameStringInterpolated;
1391                                         // }
1392                                         // return TokenNameEncapsedString0;
1393                                         // case '-':
1394                                         // if (getNextChar('>'))
1395                                         // return TokenNameMINUS_GREATER;
1396                                         // return TokenNameSTRING;
1397                                         // default:
1398                                         // if (currentCharacter == '$') {
1399                                         // int oldPosition = currentPosition;
1400                                         // try {
1401                                         // currentCharacter = source[currentPosition++];
1402                                         // if (currentCharacter == '{') {
1403                                         // return TokenNameDOLLAR_LBRACE;
1404                                         // }
1405                                         // if (isPHPIdentifierStart(currentCharacter)) {
1406                                         // return scanIdentifierOrKeyword(true);
1407                                         // } else {
1408                                         // currentPosition = oldPosition;
1409                                         // return TokenNameSTRING;
1410                                         // }
1411                                         // } catch (IndexOutOfBoundsException e) {
1412                                         // currentPosition = oldPosition;
1413                                         // return TokenNameSTRING;
1414                                         // }
1415                                         // }
1416                                         // if (isPHPIdentifierStart(currentCharacter))
1417                                         // return scanIdentifierOrKeyword(false);
1418                                         // if (Character.isDigit(currentCharacter))
1419                                         // return scanNumber(false);
1420                                         // return TokenNameERROR;
1421                                         // }
1422                                         // }
1423                                         // boolean isWhiteSpace;
1424
1425                                         while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1426                                                 startPosition = currentPosition;
1427                                                 currentCharacter = source[currentPosition++];
1428                                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
1429                                                 // && (source[currentPosition] == 'u')) {
1430                                                 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1431                                                 // } else {
1432                                                 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1433                                                         checkNonExternalizeString();
1434                                                         if (recordLineSeparator) {
1435                                                                 pushLineSeparator();
1436                                                         } else {
1437                                                                 currentLine = null;
1438                                                         }
1439                                                 }
1440                                                 // isWhiteSpace = (currentCharacter == ' ')
1441                                                 // || Character.isWhitespace(currentCharacter);
1442                                                 // }
1443                                         }
1444                                         if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1445                                                 // reposition scanner in case we are interested by spaces as tokens
1446                                                 currentPosition--;
1447                                                 startPosition = whiteStart;
1448                                                 return TokenNameWHITESPACE;
1449                                         }
1450                                         // little trick to get out in the middle of a source compuation
1451                                         if (currentPosition > eofPosition)
1452                                                 return TokenNameEOF;
1453                                         // ---------Identify the next token-------------
1454                                         switch (currentCharacter) {
1455                                         case '(':
1456                                                 return getCastOrParen();
1457                                         case ')':
1458                                                 return TokenNameRPAREN;
1459                                         case '{':
1460                                                 return TokenNameLBRACE;
1461                                         case '}':
1462                                                 return TokenNameRBRACE;
1463                                         case '[':
1464                                                 return TokenNameLBRACKET;
1465                                         case ']':
1466                                                 return TokenNameRBRACKET;
1467                                         case ';':
1468                                                 return TokenNameSEMICOLON;
1469                                         case ',':
1470                                                 return TokenNameCOMMA;
1471                                         case '.':
1472                                                 if (getNextChar('='))
1473                                                         return TokenNameDOT_EQUAL;
1474                                                 if (getNextCharAsDigit())
1475                                                         return scanNumber(true);
1476                                                 return TokenNameDOT;
1477                                         case '+': {
1478                                                 int test;
1479                                                 if ((test = getNextChar('+', '=')) == 0)
1480                                                         return TokenNamePLUS_PLUS;
1481                                                 if (test > 0)
1482                                                         return TokenNamePLUS_EQUAL;
1483                                                 return TokenNamePLUS;
1484                                         }
1485                                         case '-': {
1486                                                 int test;
1487                                                 if ((test = getNextChar('-', '=')) == 0)
1488                                                         return TokenNameMINUS_MINUS;
1489                                                 if (test > 0)
1490                                                         return TokenNameMINUS_EQUAL;
1491                                                 if (getNextChar('>'))
1492                                                         return TokenNameMINUS_GREATER;
1493                                                 return TokenNameMINUS;
1494                                         }
1495                                         case '~':
1496                                                 if (getNextChar('='))
1497                                                         return TokenNameTWIDDLE_EQUAL;
1498                                                 return TokenNameTWIDDLE;
1499                                         case '!':
1500                                                 if (getNextChar('=')) {
1501                                                         if (getNextChar('=')) {
1502                                                                 return TokenNameNOT_EQUAL_EQUAL;
1503                                                         }
1504                                                         return TokenNameNOT_EQUAL;
1505                                                 }
1506                                                 return TokenNameNOT;
1507                                         case '*':
1508                                                 if (getNextChar('='))
1509                                                         return TokenNameMULTIPLY_EQUAL;
1510                                                 return TokenNameMULTIPLY;
1511                                         case '%':
1512                                                 if (getNextChar('='))
1513                                                         return TokenNameREMAINDER_EQUAL;
1514                                                 return TokenNameREMAINDER;
1515                                         case '<': {
1516                                                 int oldPosition = currentPosition;
1517                                                 try {
1518                                                         currentCharacter = source[currentPosition++];
1519                                                 } catch (IndexOutOfBoundsException e) {
1520                                                         currentPosition = oldPosition;
1521                                                         return TokenNameLESS;
1522                                                 }
1523                                                 switch (currentCharacter) {
1524                                                 case '=':
1525                                                         return TokenNameLESS_EQUAL;
1526                                                 case '>':
1527                                                         return TokenNameNOT_EQUAL;
1528                                                 case '<':
1529                                                         if (getNextChar('='))
1530                                                                 return TokenNameLEFT_SHIFT_EQUAL;
1531                                                         if (getNextChar('<')) {
1532                                                                 currentCharacter = source[currentPosition++];
1533                                                                 while (Character.isWhitespace(currentCharacter)) {
1534                                                                         currentCharacter = source[currentPosition++];
1535                                                                 }
1536                                                                 int heredocStart = currentPosition - 1;
1537                                                                 int heredocLength = 0;
1538                                                                 if (isPHPIdentifierStart(currentCharacter)) {
1539                                                                         currentCharacter = source[currentPosition++];
1540                                                                 } else {
1541                                                                         return TokenNameERROR;
1542                                                                 }
1543                                                                 while (isPHPIdentifierPart(currentCharacter)) {
1544                                                                         currentCharacter = source[currentPosition++];
1545                                                                 }
1546                                                                 heredocLength = currentPosition - heredocStart - 1;
1547                                                                 // heredoc end-tag determination
1548                                                                 boolean endTag = true;
1549                                                                 char ch;
1550                                                                 do {
1551                                                                         ch = source[currentPosition++];
1552                                                                         if (ch == '\r' || ch == '\n') {
1553                                                                                 if (recordLineSeparator) {
1554                                                                                         pushLineSeparator();
1555                                                                                 } else {
1556                                                                                         currentLine = null;
1557                                                                                 }
1558                                                                                 for (int i = 0; i < heredocLength; i++) {
1559                                                                                         if (source[currentPosition + i] != source[heredocStart + i]) {
1560                                                                                                 endTag = false;
1561                                                                                                 break;
1562                                                                                         }
1563                                                                                 }
1564                                                                                 if (endTag) {
1565                                                                                         currentPosition += heredocLength - 1;
1566                                                                                         currentCharacter = source[currentPosition++];
1567                                                                                         break; // do...while loop
1568                                                                                 } else {
1569                                                                                         endTag = true;
1570                                                                                 }
1571                                                                         }
1572                                                                 } while (true);
1573                                                                 return TokenNameHEREDOC;
1574                                                         }
1575                                                         return TokenNameLEFT_SHIFT;
1576                                                 }
1577                                                 currentPosition = oldPosition;
1578                                                 return TokenNameLESS;
1579                                         }
1580                                         case '>': {
1581                                                 int test;
1582                                                 if ((test = getNextChar('=', '>')) == 0)
1583                                                         return TokenNameGREATER_EQUAL;
1584                                                 if (test > 0) {
1585                                                         if ((test = getNextChar('=', '>')) == 0)
1586                                                                 return TokenNameRIGHT_SHIFT_EQUAL;
1587                                                         return TokenNameRIGHT_SHIFT;
1588                                                 }
1589                                                 return TokenNameGREATER;
1590                                         }
1591                                         case '=':
1592                                                 if (getNextChar('=')) {
1593                                                         if (getNextChar('=')) {
1594                                                                 return TokenNameEQUAL_EQUAL_EQUAL;
1595                                                         }
1596                                                         return TokenNameEQUAL_EQUAL;
1597                                                 }
1598                                                 if (getNextChar('>'))
1599                                                         return TokenNameEQUAL_GREATER;
1600                                                 return TokenNameEQUAL;
1601                                         case '&': {
1602                                                 int test;
1603                                                 if ((test = getNextChar('&', '=')) == 0)
1604                                                         return TokenNameAND_AND;
1605                                                 if (test > 0)
1606                                                         return TokenNameAND_EQUAL;
1607                                                 return TokenNameAND;
1608                                         }
1609                                         case '|': {
1610                                                 int test;
1611                                                 if ((test = getNextChar('|', '=')) == 0)
1612                                                         return TokenNameOR_OR;
1613                                                 if (test > 0)
1614                                                         return TokenNameOR_EQUAL;
1615                                                 return TokenNameOR;
1616                                         }
1617                                         case '^':
1618                                                 if (getNextChar('='))
1619                                                         return TokenNameXOR_EQUAL;
1620                                                 return TokenNameXOR;
1621                                         case '?':
1622                                                 if (getNextChar('>')) {
1623                                                         phpMode = false;
1624                                                         if (currentPosition == source.length) {
1625                                                                 phpMode = true;
1626                                                                 return TokenNameINLINE_HTML;
1627                                                         }
1628                                                         return getInlinedHTMLToken(currentPosition - 2);
1629                                                 }
1630                                                 return TokenNameQUESTION;
1631                                         case ':':
1632                                                 if (getNextChar(':'))
1633                                                         return TokenNamePAAMAYIM_NEKUDOTAYIM;
1634                                                 return TokenNameCOLON;
1635                                         case '@':
1636                                                 return TokenNameAT;
1637                                         case '\'':
1638                                                 consumeStringConstant();
1639                                                 return TokenNameStringSingleQuote;
1640                                         case '"':
1641                                                 // if (tokenizeStrings) {
1642                                                 consumeStringLiteral();
1643                                                 return TokenNameStringDoubleQuote;
1644                                         // }
1645                                         // return TokenNameEncapsedString2;
1646                                         case '`':
1647                                                 // if (tokenizeStrings) {
1648                                                 consumeStringInterpolated();
1649                                                 return TokenNameStringInterpolated;
1650                                         // }
1651                                         // return TokenNameEncapsedString0;
1652                                         case '#':
1653                                         case '/': {
1654                                                 char startChar = currentCharacter;
1655                                                 if (getNextChar('=') && startChar == '/') {
1656                                                         return TokenNameDIVIDE_EQUAL;
1657                                                 }
1658                                                 int test;
1659                                                 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1660                                                         // line comment
1661                                                         this.lastCommentLinePosition = this.currentPosition;
1662                                                         int endPositionForLineComment = 0;
1663                                                         try { // get the next char
1664                                                                 currentCharacter = source[currentPosition++];
1665                                                                 // if (((currentCharacter = source[currentPosition++])
1666                                                                 // == '\\')
1667                                                                 // && (source[currentPosition] == 'u')) {
1668                                                                 // //-------------unicode traitement ------------
1669                                                                 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1670                                                                 // currentPosition++;
1671                                                                 // while (source[currentPosition] == 'u') {
1672                                                                 // currentPosition++;
1673                                                                 // }
1674                                                                 // if ((c1 =
1675                                                                 // Character.getNumericValue(source[currentPosition++]))
1676                                                                 // > 15
1677                                                                 // || c1 < 0
1678                                                                 // || (c2 =
1679                                                                 // Character.getNumericValue(source[currentPosition++]))
1680                                                                 // > 15
1681                                                                 // || c2 < 0
1682                                                                 // || (c3 =
1683                                                                 // Character.getNumericValue(source[currentPosition++]))
1684                                                                 // > 15
1685                                                                 // || c3 < 0
1686                                                                 // || (c4 =
1687                                                                 // Character.getNumericValue(source[currentPosition++]))
1688                                                                 // > 15
1689                                                                 // || c4 < 0) {
1690                                                                 // throw new
1691                                                                 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1692                                                                 // } else {
1693                                                                 // currentCharacter =
1694                                                                 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1695                                                                 // }
1696                                                                 // }
1697                                                                 // handle the \\u case manually into comment
1698                                                                 // if (currentCharacter == '\\') {
1699                                                                 // if (source[currentPosition] == '\\')
1700                                                                 // currentPosition++;
1701                                                                 // } //jump over the \\
1702                                                                 boolean isUnicode = false;
1703                                                                 while (currentCharacter != '\r' && currentCharacter != '\n') {
1704                                                                         this.lastCommentLinePosition = this.currentPosition;
1705                                                                         if (currentCharacter == '?') {
1706                                                                                 if (getNextChar('>')) {
1707                                                                                         // ?> breaks line comments
1708                                                                                         startPosition = currentPosition - 2;
1709                                                                                         phpMode = false;
1710                                                                                         return TokenNameINLINE_HTML;
1711                                                                                 }
1712                                                                         }
1713                                                                         // get the next char
1714                                                                         isUnicode = false;
1715                                                                         currentCharacter = source[currentPosition++];
1716                                                                         // if (((currentCharacter = source[currentPosition++])
1717                                                                         // == '\\')
1718                                                                         // && (source[currentPosition] == 'u')) {
1719                                                                         // isUnicode = true;
1720                                                                         // //-------------unicode traitement ------------
1721                                                                         // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1722                                                                         // currentPosition++;
1723                                                                         // while (source[currentPosition] == 'u') {
1724                                                                         // currentPosition++;
1725                                                                         // }
1726                                                                         // if ((c1 =
1727                                                                         // Character.getNumericValue(source[currentPosition++]))
1728                                                                         // > 15
1729                                                                         // || c1 < 0
1730                                                                         // || (c2 =
1731                                                                         // Character.getNumericValue(
1732                                                                         // source[currentPosition++]))
1733                                                                         // > 15
1734                                                                         // || c2 < 0
1735                                                                         // || (c3 =
1736                                                                         // Character.getNumericValue(
1737                                                                         // source[currentPosition++]))
1738                                                                         // > 15
1739                                                                         // || c3 < 0
1740                                                                         // || (c4 =
1741                                                                         // Character.getNumericValue(
1742                                                                         // source[currentPosition++]))
1743                                                                         // > 15
1744                                                                         // || c4 < 0) {
1745                                                                         // throw new
1746                                                                         // InvalidInputException(INVALID_UNICODE_ESCAPE);
1747                                                                         // } else {
1748                                                                         // currentCharacter =
1749                                                                         // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1750                                                                         // }
1751                                                                         // }
1752                                                                         // handle the \\u case manually into comment
1753                                                                         // if (currentCharacter == '\\') {
1754                                                                         // if (source[currentPosition] == '\\')
1755                                                                         // currentPosition++;
1756                                                                         // } //jump over the \\
1757                                                                 }
1758                                                                 if (isUnicode) {
1759                                                                         endPositionForLineComment = currentPosition - 6;
1760                                                                 } else {
1761                                                                         endPositionForLineComment = currentPosition - 1;
1762                                                                 }
1763                                                                 // recordComment(false);
1764                                                                 recordComment(TokenNameCOMMENT_LINE);
1765                                                                 if (this.taskTags != null)
1766                                                                         checkTaskTag(this.startPosition, this.currentPosition);
1767                                                                 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1768                                                                         checkNonExternalizeString();
1769                                                                         if (recordLineSeparator) {
1770                                                                                 if (isUnicode) {
1771                                                                                         pushUnicodeLineSeparator();
1772                                                                                 } else {
1773                                                                                         pushLineSeparator();
1774                                                                                 }
1775                                                                         } else {
1776                                                                                 currentLine = null;
1777                                                                         }
1778                                                                 }
1779                                                                 if (tokenizeComments) {
1780                                                                         if (!isUnicode) {
1781                                                                                 currentPosition = endPositionForLineComment;
1782                                                                                 // reset one character behind
1783                                                                         }
1784                                                                         return TokenNameCOMMENT_LINE;
1785                                                                 }
1786                                                         } catch (IndexOutOfBoundsException e) { // an eof will them
1787                                                                 // be generated
1788                                                                 if (tokenizeComments) {
1789                                                                         currentPosition--;
1790                                                                         // reset one character behind
1791                                                                         return TokenNameCOMMENT_LINE;
1792                                                                 }
1793                                                         }
1794                                                         break;
1795                                                 }
1796                                                 if (test > 0) {
1797                                                         // traditional and annotation comment
1798                                                         boolean isJavadoc = false, star = false;
1799                                                         // consume next character
1800                                                         unicodeAsBackSlash = false;
1801                                                         currentCharacter = source[currentPosition++];
1802                                                         // if (((currentCharacter = source[currentPosition++]) ==
1803                                                         // '\\')
1804                                                         // && (source[currentPosition] == 'u')) {
1805                                                         // getNextUnicodeChar();
1806                                                         // } else {
1807                                                         // if (withoutUnicodePtr != 0) {
1808                                                         // withoutUnicodeBuffer[++withoutUnicodePtr] =
1809                                                         // currentCharacter;
1810                                                         // }
1811                                                         // }
1812                                                         if (currentCharacter == '*') {
1813                                                                 isJavadoc = true;
1814                                                                 star = true;
1815                                                         }
1816                                                         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1817                                                                 checkNonExternalizeString();
1818                                                                 if (recordLineSeparator) {
1819                                                                         pushLineSeparator();
1820                                                                 } else {
1821                                                                         currentLine = null;
1822                                                                 }
1823                                                         }
1824                                                         try { // get the next char
1825                                                                 currentCharacter = source[currentPosition++];
1826                                                                 // if (((currentCharacter = source[currentPosition++])
1827                                                                 // == '\\')
1828                                                                 // && (source[currentPosition] == 'u')) {
1829                                                                 // //-------------unicode traitement ------------
1830                                                                 // getNextUnicodeChar();
1831                                                                 // }
1832                                                                 // handle the \\u case manually into comment
1833                                                                 // if (currentCharacter == '\\') {
1834                                                                 // if (source[currentPosition] == '\\')
1835                                                                 // currentPosition++;
1836                                                                 // //jump over the \\
1837                                                                 // }
1838                                                                 // empty comment is not a javadoc /**/
1839                                                                 if (currentCharacter == '/') {
1840                                                                         isJavadoc = false;
1841                                                                 }
1842                                                                 // loop until end of comment */
1843                                                                 while ((currentCharacter != '/') || (!star)) {
1844                                                                         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1845                                                                                 checkNonExternalizeString();
1846                                                                                 if (recordLineSeparator) {
1847                                                                                         pushLineSeparator();
1848                                                                                 } else {
1849                                                                                         currentLine = null;
1850                                                                                 }
1851                                                                         }
1852                                                                         star = currentCharacter == '*';
1853                                                                         // get next char
1854                                                                         currentCharacter = source[currentPosition++];
1855                                                                         // if (((currentCharacter = source[currentPosition++])
1856                                                                         // == '\\')
1857                                                                         // && (source[currentPosition] == 'u')) {
1858                                                                         // //-------------unicode traitement ------------
1859                                                                         // getNextUnicodeChar();
1860                                                                         // }
1861                                                                         // handle the \\u case manually into comment
1862                                                                         // if (currentCharacter == '\\') {
1863                                                                         // if (source[currentPosition] == '\\')
1864                                                                         // currentPosition++;
1865                                                                         // } //jump over the \\
1866                                                                 }
1867                                                                 // recordComment(isJavadoc);
1868                                                                 if (isJavadoc) {
1869                                                                         recordComment(TokenNameCOMMENT_PHPDOC);
1870                                                                 } else {
1871                                                                         recordComment(TokenNameCOMMENT_BLOCK);
1872                                                                 }
1873
1874                                                                 if (tokenizeComments) {
1875                                                                         if (isJavadoc)
1876                                                                                 return TokenNameCOMMENT_PHPDOC;
1877                                                                         return TokenNameCOMMENT_BLOCK;
1878                                                                 }
1879
1880                                                                 if (this.taskTags != null) {
1881                                                                         checkTaskTag(this.startPosition, this.currentPosition);
1882                                                                 }
1883                                                         } catch (IndexOutOfBoundsException e) {
1884                                                                 // reset end position for error reporting
1885                                                                 currentPosition -= 2;
1886                                                                 throw new InvalidInputException(UNTERMINATED_COMMENT);
1887                                                         }
1888                                                         break;
1889                                                 }
1890                                                 return TokenNameDIVIDE;
1891                                         }
1892                                         case '\u001a':
1893                                                 if (atEnd())
1894                                                         return TokenNameEOF;
1895                                                 // the atEnd may not be <currentPosition == source.length> if
1896                                                 // source is only some part of a real (external) stream
1897                                                 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1898                                         default:
1899                                                 if (currentCharacter == '$') {
1900                                                         int oldPosition = currentPosition;
1901                                                         try {
1902                                                                 currentCharacter = source[currentPosition++];
1903                                                                 if (isPHPIdentifierStart(currentCharacter)) {
1904                                                                         return scanIdentifierOrKeyword(true);
1905                                                                 } else {
1906                                                                         currentPosition = oldPosition;
1907                                                                         return TokenNameDOLLAR;
1908                                                                 }
1909                                                         } catch (IndexOutOfBoundsException e) {
1910                                                                 currentPosition = oldPosition;
1911                                                                 return TokenNameDOLLAR;
1912                                                         }
1913                                                 }
1914                                                 if (isPHPIdentifierStart(currentCharacter))
1915                                                         return scanIdentifierOrKeyword(false);
1916                                                 if (Character.isDigit(currentCharacter))
1917                                                         return scanNumber(false);
1918                                                 return TokenNameERROR;
1919                                         }
1920                                 }
1921                         } // -----------------end switch while try--------------------
1922                         catch (IndexOutOfBoundsException e) {
1923                         }
1924                 }
1925                 return TokenNameEOF;
1926         }
1927
1928         /**
1929          * @return
1930          * @throws InvalidInputException
1931          */
1932         private int getInlinedHTMLToken(int start) throws InvalidInputException {
1933                 if (currentPosition > source.length) {
1934                         currentPosition = source.length;
1935                         return TokenNameEOF;
1936                 }
1937                 startPosition = start;
1938                 try {
1939                         while (!phpMode) {
1940                                 currentCharacter = source[currentPosition++];
1941                                 if (currentCharacter == '<') {
1942                                         if (getNextChar('?')) {
1943                                                 currentCharacter = source[currentPosition++];
1944                                                 if ((currentCharacter != 'P') && (currentCharacter != 'p')) {
1945                                                         if (currentCharacter != '=') { // <?=
1946                                                                 currentPosition--;
1947                                                         } else {
1948                                                                 phpExpressionTag = true;
1949                                                         }
1950                                                         // <?
1951                                                         if (ignorePHPOneLiner) { // for CodeFormatter
1952                                                                 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1953                                                                         phpMode = true;
1954                                                                         return TokenNameINLINE_HTML;
1955                                                                 }
1956                                                         } else {
1957                                                                 phpMode = true;
1958                                                                 return TokenNameINLINE_HTML;
1959                                                         }
1960                                                 } else {
1961                                                         // boolean phpStart = (currentCharacter == 'P') ||
1962                                                         // (currentCharacter == 'p');
1963                                                         // if (phpStart) {
1964                                                         int test = getNextChar('H', 'h');
1965                                                         if (test >= 0) {
1966                                                                 test = getNextChar('P', 'p');
1967                                                                 if (test >= 0) {
1968                                                                         // <?PHP <?php
1969                                                                         if (ignorePHPOneLiner) {
1970                                                                                 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1971                                                                                         phpMode = true;
1972                                                                                         return TokenNameINLINE_HTML;
1973                                                                                 }
1974                                                                         } else {
1975                                                                                 phpMode = true;
1976                                                                                 return TokenNameINLINE_HTML;
1977                                                                         }
1978                                                                 }
1979                                                         }
1980                                                         // }
1981                                                 }
1982                                         }
1983                                 }
1984                                 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1985                                         if (recordLineSeparator) {
1986                                                 pushLineSeparator();
1987                                         } else {
1988                                                 currentLine = null;
1989                                         }
1990                                 }
1991                         } // -----------------while--------------------
1992                         phpMode = true;
1993                         return TokenNameINLINE_HTML;
1994                 } // -----------------try--------------------
1995                 catch (IndexOutOfBoundsException e) {
1996                         startPosition = start;
1997                         currentPosition--;
1998                 }
1999                 phpMode = true;
2000                 return TokenNameINLINE_HTML;
2001         }
2002
2003         /**
2004          * @return
2005          */
2006         private int lookAheadLinePHPTag() {
2007                 // check if the PHP is only in this line (for CodeFormatter)
2008                 int currentPositionInLine = currentPosition;
2009                 char previousCharInLine = ' ';
2010                 char currentCharInLine = ' ';
2011                 boolean singleQuotedStringActive = false;
2012                 boolean doubleQuotedStringActive = false;
2013
2014                 try {
2015                         // look ahead in this line
2016                         while (true) {
2017                                 previousCharInLine = currentCharInLine;
2018                                 currentCharInLine = source[currentPositionInLine++];
2019                                 switch (currentCharInLine) {
2020                                 case '>':
2021                                         if (previousCharInLine == '?') {
2022                                                 // update the scanner's current Position in the source
2023                                                 currentPosition = currentPositionInLine;
2024                                                 // use as "dummy" token
2025                                                 return TokenNameEOF;
2026                                         }
2027                                         break;
2028                                 case '\\':
2029                                         if (doubleQuotedStringActive) {
2030                                                 // ignore escaped characters in double quoted strings
2031                                                 previousCharInLine = currentCharInLine;
2032                                                 currentCharInLine = source[currentPositionInLine++];
2033                                         }
2034                                 case '\"':
2035                                         if (doubleQuotedStringActive) {
2036                                                 doubleQuotedStringActive = false;
2037                                         } else {
2038                                                 if (!singleQuotedStringActive) {
2039                                                         doubleQuotedStringActive = true;
2040                                                 }
2041                                         }
2042                                         break;
2043                                 case '\'':
2044                                         if (singleQuotedStringActive) {
2045                                                 if (previousCharInLine != '\\') {
2046                                                         singleQuotedStringActive = false;
2047                                                 }
2048                                         } else {
2049                                                 if (!doubleQuotedStringActive) {
2050                                                         singleQuotedStringActive = true;
2051                                                 }
2052                                         }
2053                                         break;
2054                                 case '\n':
2055                                         phpMode = true;
2056                                         return TokenNameINLINE_HTML;
2057                                 case '#':
2058                                         if (!singleQuotedStringActive && !doubleQuotedStringActive) {
2059                                                 phpMode = true;
2060                                                 return TokenNameINLINE_HTML;
2061                                         }
2062                                         break;
2063                                 case '/':
2064                                         if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
2065                                                 phpMode = true;
2066                                                 return TokenNameINLINE_HTML;
2067                                         }
2068                                         break;
2069                                 case '*':
2070                                         if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
2071                                                 phpMode = true;
2072                                                 return TokenNameINLINE_HTML;
2073                                         }
2074                                         break;
2075                                 }
2076                         }
2077                 } catch (IndexOutOfBoundsException e) {
2078                         phpMode = true;
2079                         currentPosition = currentPositionInLine;
2080                         return TokenNameINLINE_HTML;
2081                 }
2082         }
2083
2084         // public final void getNextUnicodeChar()
2085         // throws IndexOutOfBoundsException, InvalidInputException {
2086         // //VOID
2087         // //handle the case of unicode.
2088         // //when a unicode appears then we must use a buffer that holds char
2089         // internal values
2090         // //At the end of this method currentCharacter holds the new visited char
2091         // //and currentPosition points right next after it
2092         //
2093         // //ALL getNextChar.... ARE OPTIMIZED COPIES
2094         //
2095         // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
2096         // currentPosition++;
2097         // while (source[currentPosition] == 'u') {
2098         // currentPosition++;
2099         // unicodeSize++;
2100         // }
2101         //
2102         // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2103         // || c1 < 0
2104         // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2105         // || c2 < 0
2106         // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2107         // || c3 < 0
2108         // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2109         // || c4 < 0) {
2110         // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2111         // } else {
2112         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2113         // //need the unicode buffer
2114         // if (withoutUnicodePtr == 0) {
2115         // //buffer all the entries that have been left aside....
2116         // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2117         // System.arraycopy(
2118         // source,
2119         // startPosition,
2120         // withoutUnicodeBuffer,
2121         // 1,
2122         // withoutUnicodePtr);
2123         // }
2124         // //fill the buffer with the char
2125         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2126         // }
2127         // unicodeAsBackSlash = currentCharacter == '\\';
2128         // }
2129         /*
2130          * Tokenize a method body, assuming that curly brackets are properly balanced.
2131          */
2132         public final void jumpOverMethodBody() {
2133                 this.wasAcr = false;
2134                 int found = 1;
2135                 try {
2136                         while (true) { // loop for jumping over comments
2137                                 // ---------Consume white space and handles startPosition---------
2138                                 boolean isWhiteSpace;
2139                                 do {
2140                                         startPosition = currentPosition;
2141                                         currentCharacter = source[currentPosition++];
2142                                         // if (((currentCharacter = source[currentPosition++]) == '\\')
2143                                         // && (source[currentPosition] == 'u')) {
2144                                         // isWhiteSpace = jumpOverUnicodeWhiteSpace();
2145                                         // } else {
2146                                         if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2147                                                 pushLineSeparator();
2148                                         isWhiteSpace = Character.isWhitespace(currentCharacter);
2149                                         // }
2150                                 } while (isWhiteSpace);
2151                                 // -------consume token until } is found---------
2152                                 switch (currentCharacter) {
2153                                 case '{':
2154                                         found++;
2155                                         break;
2156                                 case '}':
2157                                         found--;
2158                                         if (found == 0)
2159                                                 return;
2160                                         break;
2161                                 case '\'': {
2162                                         boolean test;
2163                                         test = getNextChar('\\');
2164                                         if (test) {
2165                                                 try {
2166                                                         scanDoubleQuotedEscapeCharacter();
2167                                                 } catch (InvalidInputException ex) {
2168                                                 }
2169                                                 ;
2170                                         } else {
2171                                                 // try { // consume next character
2172                                                 unicodeAsBackSlash = false;
2173                                                 currentCharacter = source[currentPosition++];
2174                                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
2175                                                 // && (source[currentPosition] == 'u')) {
2176                                                 // getNextUnicodeChar();
2177                                                 // } else {
2178                                                 if (withoutUnicodePtr != 0) {
2179                                                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2180                                                 }
2181                                                 // }
2182                                                 // } catch (InvalidInputException ex) {
2183                                                 // };
2184                                         }
2185                                         getNextChar('\'');
2186                                         break;
2187                                 }
2188                                 case '"':
2189                                         try {
2190                                                 // try { // consume next character
2191                                                 unicodeAsBackSlash = false;
2192                                                 currentCharacter = source[currentPosition++];
2193                                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
2194                                                 // && (source[currentPosition] == 'u')) {
2195                                                 // getNextUnicodeChar();
2196                                                 // } else {
2197                                                 if (withoutUnicodePtr != 0) {
2198                                                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2199                                                 }
2200                                                 // }
2201                                                 // } catch (InvalidInputException ex) {
2202                                                 // };
2203                                                 while (currentCharacter != '"') {
2204                                                         if (currentCharacter == '\r') {
2205                                                                 if (source[currentPosition] == '\n')
2206                                                                         currentPosition++;
2207                                                                 break;
2208                                                                 // the string cannot go further that the line
2209                                                         }
2210                                                         if (currentCharacter == '\n') {
2211                                                                 break;
2212                                                                 // the string cannot go further that the line
2213                                                         }
2214                                                         if (currentCharacter == '\\') {
2215                                                                 try {
2216                                                                         scanDoubleQuotedEscapeCharacter();
2217                                                                 } catch (InvalidInputException ex) {
2218                                                                 }
2219                                                                 ;
2220                                                         }
2221                                                         // try { // consume next character
2222                                                         unicodeAsBackSlash = false;
2223                                                         currentCharacter = source[currentPosition++];
2224                                                         // if (((currentCharacter = source[currentPosition++]) == '\\')
2225                                                         // && (source[currentPosition] == 'u')) {
2226                                                         // getNextUnicodeChar();
2227                                                         // } else {
2228                                                         if (withoutUnicodePtr != 0) {
2229                                                                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2230                                                         }
2231                                                         // }
2232                                                         // } catch (InvalidInputException ex) {
2233                                                         // };
2234                                                 }
2235                                         } catch (IndexOutOfBoundsException e) {
2236                                                 return;
2237                                         }
2238                                         break;
2239                                 case '/': {
2240                                         int test;
2241                                         if ((test = getNextChar('/', '*')) == 0) {
2242                                                 // line comment
2243                                                 try {
2244                                                         // get the next char
2245                                                         currentCharacter = source[currentPosition++];
2246                                                         // if (((currentCharacter = source[currentPosition++]) ==
2247                                                         // '\\')
2248                                                         // && (source[currentPosition] == 'u')) {
2249                                                         // //-------------unicode traitement ------------
2250                                                         // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2251                                                         // currentPosition++;
2252                                                         // while (source[currentPosition] == 'u') {
2253                                                         // currentPosition++;
2254                                                         // }
2255                                                         // if ((c1 =
2256                                                         // Character.getNumericValue(source[currentPosition++]))
2257                                                         // > 15
2258                                                         // || c1 < 0
2259                                                         // || (c2 =
2260                                                         // Character.getNumericValue(source[currentPosition++]))
2261                                                         // > 15
2262                                                         // || c2 < 0
2263                                                         // || (c3 =
2264                                                         // Character.getNumericValue(source[currentPosition++]))
2265                                                         // > 15
2266                                                         // || c3 < 0
2267                                                         // || (c4 =
2268                                                         // Character.getNumericValue(source[currentPosition++]))
2269                                                         // > 15
2270                                                         // || c4 < 0) {
2271                                                         // //error don't care of the value
2272                                                         // currentCharacter = 'A';
2273                                                         // } //something different from \n and \r
2274                                                         // else {
2275                                                         // currentCharacter =
2276                                                         // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2277                                                         // }
2278                                                         // }
2279                                                         while (currentCharacter != '\r' && currentCharacter != '\n') {
2280                                                                 // get the next char
2281                                                                 currentCharacter = source[currentPosition++];
2282                                                                 // if (((currentCharacter = source[currentPosition++])
2283                                                                 // == '\\')
2284                                                                 // && (source[currentPosition] == 'u')) {
2285                                                                 // //-------------unicode traitement ------------
2286                                                                 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2287                                                                 // currentPosition++;
2288                                                                 // while (source[currentPosition] == 'u') {
2289                                                                 // currentPosition++;
2290                                                                 // }
2291                                                                 // if ((c1 =
2292                                                                 // Character.getNumericValue(source[currentPosition++]))
2293                                                                 // > 15
2294                                                                 // || c1 < 0
2295                                                                 // || (c2 =
2296                                                                 // Character.getNumericValue(source[currentPosition++]))
2297                                                                 // > 15
2298                                                                 // || c2 < 0
2299                                                                 // || (c3 =
2300                                                                 // Character.getNumericValue(source[currentPosition++]))
2301                                                                 // > 15
2302                                                                 // || c3 < 0
2303                                                                 // || (c4 =
2304                                                                 // Character.getNumericValue(source[currentPosition++]))
2305                                                                 // > 15
2306                                                                 // || c4 < 0) {
2307                                                                 // //error don't care of the value
2308                                                                 // currentCharacter = 'A';
2309                                                                 // } //something different from \n and \r
2310                                                                 // else {
2311                                                                 // currentCharacter =
2312                                                                 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2313                                                                 // }
2314                                                                 // }
2315                                                         }
2316                                                         if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2317                                                                 pushLineSeparator();
2318                                                 } catch (IndexOutOfBoundsException e) {
2319                                                 } // an eof will them be generated
2320                                                 break;
2321                                         }
2322                                         if (test > 0) {
2323                                                 // traditional and annotation comment
2324                                                 boolean star = false;
2325                                                 // try { // consume next character
2326                                                 unicodeAsBackSlash = false;
2327                                                 currentCharacter = source[currentPosition++];
2328                                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
2329                                                 // && (source[currentPosition] == 'u')) {
2330                                                 // getNextUnicodeChar();
2331                                                 // } else {
2332                                                 if (withoutUnicodePtr != 0) {
2333                                                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2334                                                 }
2335                                                 // };
2336                                                 // } catch (InvalidInputException ex) {
2337                                                 // };
2338                                                 if (currentCharacter == '*') {
2339                                                         star = true;
2340                                                 }
2341                                                 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2342                                                         pushLineSeparator();
2343                                                 try { // get the next char
2344                                                         currentCharacter = source[currentPosition++];
2345                                                         // if (((currentCharacter = source[currentPosition++]) ==
2346                                                         // '\\')
2347                                                         // && (source[currentPosition] == 'u')) {
2348                                                         // //-------------unicode traitement ------------
2349                                                         // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2350                                                         // currentPosition++;
2351                                                         // while (source[currentPosition] == 'u') {
2352                                                         // currentPosition++;
2353                                                         // }
2354                                                         // if ((c1 =
2355                                                         // Character.getNumericValue(source[currentPosition++]))
2356                                                         // > 15
2357                                                         // || c1 < 0
2358                                                         // || (c2 =
2359                                                         // Character.getNumericValue(source[currentPosition++]))
2360                                                         // > 15
2361                                                         // || c2 < 0
2362                                                         // || (c3 =
2363                                                         // Character.getNumericValue(source[currentPosition++]))
2364                                                         // > 15
2365                                                         // || c3 < 0
2366                                                         // || (c4 =
2367                                                         // Character.getNumericValue(source[currentPosition++]))
2368                                                         // > 15
2369                                                         // || c4 < 0) {
2370                                                         // //error don't care of the value
2371                                                         // currentCharacter = 'A';
2372                                                         // } //something different from * and /
2373                                                         // else {
2374                                                         // currentCharacter =
2375                                                         // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2376                                                         // }
2377                                                         // }
2378                                                         // loop until end of comment */
2379                                                         while ((currentCharacter != '/') || (!star)) {
2380                                                                 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2381                                                                         pushLineSeparator();
2382                                                                 star = currentCharacter == '*';
2383                                                                 // get next char
2384                                                                 currentCharacter = source[currentPosition++];
2385                                                                 // if (((currentCharacter = source[currentPosition++])
2386                                                                 // == '\\')
2387                                                                 // && (source[currentPosition] == 'u')) {
2388                                                                 // //-------------unicode traitement ------------
2389                                                                 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2390                                                                 // currentPosition++;
2391                                                                 // while (source[currentPosition] == 'u') {
2392                                                                 // currentPosition++;
2393                                                                 // }
2394                                                                 // if ((c1 =
2395                                                                 // Character.getNumericValue(source[currentPosition++]))
2396                                                                 // > 15
2397                                                                 // || c1 < 0
2398                                                                 // || (c2 =
2399                                                                 // Character.getNumericValue(source[currentPosition++]))
2400                                                                 // > 15
2401                                                                 // || c2 < 0
2402                                                                 // || (c3 =
2403                                                                 // Character.getNumericValue(source[currentPosition++]))
2404                                                                 // > 15
2405                                                                 // || c3 < 0
2406                                                                 // || (c4 =
2407                                                                 // Character.getNumericValue(source[currentPosition++]))
2408                                                                 // > 15
2409                                                                 // || c4 < 0) {
2410                                                                 // //error don't care of the value
2411                                                                 // currentCharacter = 'A';
2412                                                                 // } //something different from * and /
2413                                                                 // else {
2414                                                                 // currentCharacter =
2415                                                                 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2416                                                                 // }
2417                                                                 // }
2418                                                         }
2419                                                 } catch (IndexOutOfBoundsException e) {
2420                                                         return;
2421                                                 }
2422                                                 break;
2423                                         }
2424                                         break;
2425                                 }
2426                                 default:
2427                                         if (isPHPIdentOrVarStart(currentCharacter) ) {
2428                                                 try {
2429                                                         scanIdentifierOrKeyword((currentCharacter == '$'));
2430                                                 } catch (InvalidInputException ex) {
2431                                                 }
2432                                                 ;
2433                                                 break;
2434                                         }
2435                                 if ( ObviousIdentCharNatures[currentCharacter]==C_DIGIT) {
2436 //                                      if (Character.isDigit(currentCharacter)) {
2437                                                 try {
2438                                                         scanNumber(false);
2439                                                 } catch (InvalidInputException ex) {
2440                                                 }
2441                                                 ;
2442                                                 break;
2443                                         }
2444                                 }
2445                         }
2446                         // -----------------end switch while try--------------------
2447                 } catch (IndexOutOfBoundsException e) {
2448                 } catch (InvalidInputException e) {
2449                 }
2450                 return;
2451         }
2452
2453         // public final boolean jumpOverUnicodeWhiteSpace()
2454         // throws InvalidInputException {
2455         // //BOOLEAN
2456         // //handle the case of unicode. Jump over the next whiteSpace
2457         // //making startPosition pointing on the next available char
2458         // //On false, the currentCharacter is filled up with a potential
2459         // //correct char
2460         //
2461         // try {
2462         // this.wasAcr = false;
2463         // int c1, c2, c3, c4;
2464         // int unicodeSize = 6;
2465         // currentPosition++;
2466         // while (source[currentPosition] == 'u') {
2467         // currentPosition++;
2468         // unicodeSize++;
2469         // }
2470         //
2471         // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2472         // || c1 < 0)
2473         // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2474         // || c2 < 0)
2475         // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2476         // || c3 < 0)
2477         // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2478         // || c4 < 0)) {
2479         // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2480         // }
2481         //
2482         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2483         // if (recordLineSeparator
2484         // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2485         // pushLineSeparator();
2486         // if (Character.isWhitespace(currentCharacter))
2487         // return true;
2488         //
2489         // //buffer the new char which is not a white space
2490         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2491         // //withoutUnicodePtr == 1 is true here
2492         // return false;
2493         // } catch (IndexOutOfBoundsException e) {
2494         // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2495         // }
2496         // }
2497         public final int[] getLineEnds() {
2498                 // return a bounded copy of this.lineEnds
2499                 int[] copy;
2500                 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2501                 return copy;
2502         }
2503
2504         public char[] getSource() {
2505                 return this.source;
2506         }
2507
2508         public static boolean isIdentifierOrKeyword(int token) {
2509                 return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2510         }
2511
2512         final char[] optimizedCurrentTokenSource1() {
2513                 // return always the same char[] build only once
2514                 // optimization at no speed cost of 99.5 % of the singleCharIdentifier
2515                 char charOne = source[startPosition];
2516                 switch (charOne) {
2517                 case 'a':
2518                         return charArray_a;
2519                 case 'b':
2520                         return charArray_b;
2521                 case 'c':
2522                         return charArray_c;
2523                 case 'd':
2524                         return charArray_d;
2525                 case 'e':
2526                         return charArray_e;
2527                 case 'f':
2528                         return charArray_f;
2529                 case 'g':
2530                         return charArray_g;
2531                 case 'h':
2532                         return charArray_h;
2533                 case 'i':
2534                         return charArray_i;
2535                 case 'j':
2536                         return charArray_j;
2537                 case 'k':
2538                         return charArray_k;
2539                 case 'l':
2540                         return charArray_l;
2541                 case 'm':
2542                         return charArray_m;
2543                 case 'n':
2544                         return charArray_n;
2545                 case 'o':
2546                         return charArray_o;
2547                 case 'p':
2548                         return charArray_p;
2549                 case 'q':
2550                         return charArray_q;
2551                 case 'r':
2552                         return charArray_r;
2553                 case 's':
2554                         return charArray_s;
2555                 case 't':
2556                         return charArray_t;
2557                 case 'u':
2558                         return charArray_u;
2559                 case 'v':
2560                         return charArray_v;
2561                 case 'w':
2562                         return charArray_w;
2563                 case 'x':
2564                         return charArray_x;
2565                 case 'y':
2566                         return charArray_y;
2567                 case 'z':
2568                         return charArray_z;
2569                 default:
2570                         return new char[] { charOne };
2571                 }
2572         }
2573
2574         final char[] optimizedCurrentTokenSource2() {
2575                 char c0, c1;
2576                 c0 = source[startPosition];
2577                 c1 = source[startPosition + 1];
2578                 if (c0 == '$') {
2579                         // return always the same char[] build only once
2580                         // optimization at no speed cost of 99.5 % of the singleCharIdentifier
2581                         switch (c1) {
2582                         case 'a':
2583                                 return charArray_va;
2584                         case 'b':
2585                                 return charArray_vb;
2586                         case 'c':
2587                                 return charArray_vc;
2588                         case 'd':
2589                                 return charArray_vd;
2590                         case 'e':
2591                                 return charArray_ve;
2592                         case 'f':
2593                                 return charArray_vf;
2594                         case 'g':
2595                                 return charArray_vg;
2596                         case 'h':
2597                                 return charArray_vh;
2598                         case 'i':
2599                                 return charArray_vi;
2600                         case 'j':
2601                                 return charArray_vj;
2602                         case 'k':
2603                                 return charArray_vk;
2604                         case 'l':
2605                                 return charArray_vl;
2606                         case 'm':
2607                                 return charArray_vm;
2608                         case 'n':
2609                                 return charArray_vn;
2610                         case 'o':
2611                                 return charArray_vo;
2612                         case 'p':
2613                                 return charArray_vp;
2614                         case 'q':
2615                                 return charArray_vq;
2616                         case 'r':
2617                                 return charArray_vr;
2618                         case 's':
2619                                 return charArray_vs;
2620                         case 't':
2621                                 return charArray_vt;
2622                         case 'u':
2623                                 return charArray_vu;
2624                         case 'v':
2625                                 return charArray_vv;
2626                         case 'w':
2627                                 return charArray_vw;
2628                         case 'x':
2629                                 return charArray_vx;
2630                         case 'y':
2631                                 return charArray_vy;
2632                         case 'z':
2633                                 return charArray_vz;
2634                         }
2635                 }
2636                 // try to return the same char[] build only once
2637                 int hash = ((c0 << 6) + c1) % TableSize;
2638                 char[][] table = charArray_length[0][hash];
2639                 int i = newEntry2;
2640                 while (++i < InternalTableSize) {
2641                         char[] charArray = table[i];
2642                         if ((c0 == charArray[0]) && (c1 == charArray[1]))
2643                                 return charArray;
2644                 }
2645                 // ---------other side---------
2646                 i = -1;
2647                 int max = newEntry2;
2648                 while (++i <= max) {
2649                         char[] charArray = table[i];
2650                         if ((c0 == charArray[0]) && (c1 == charArray[1]))
2651                                 return charArray;
2652                 }
2653                 // --------add the entry-------
2654                 if (++max >= InternalTableSize)
2655                         max = 0;
2656                 char[] r;
2657                 table[max] = (r = new char[] { c0, c1 });
2658                 newEntry2 = max;
2659                 return r;
2660         }
2661
2662         final char[] optimizedCurrentTokenSource3() {
2663                 // try to return the same char[] build only once
2664                 char c0, c1, c2;
2665                 int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2666                                 % TableSize;
2667                 char[][] table = charArray_length[1][hash];
2668                 int i = newEntry3;
2669                 while (++i < InternalTableSize) {
2670                         char[] charArray = table[i];
2671                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2672                                 return charArray;
2673                 }
2674                 // ---------other side---------
2675                 i = -1;
2676                 int max = newEntry3;
2677                 while (++i <= max) {
2678                         char[] charArray = table[i];
2679                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2680                                 return charArray;
2681                 }
2682                 // --------add the entry-------
2683                 if (++max >= InternalTableSize)
2684                         max = 0;
2685                 char[] r;
2686                 table[max] = (r = new char[] { c0, c1, c2 });
2687                 newEntry3 = max;
2688                 return r;
2689         }
2690
2691         final char[] optimizedCurrentTokenSource4() {
2692                 // try to return the same char[] build only once
2693                 char c0, c1, c2, c3;
2694                 long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2695                                 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2696                                 % TableSize;
2697                 char[][] table = charArray_length[2][(int) hash];
2698                 int i = newEntry4;
2699                 while (++i < InternalTableSize) {
2700                         char[] charArray = table[i];
2701                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2702                                 return charArray;
2703                 }
2704                 // ---------other side---------
2705                 i = -1;
2706                 int max = newEntry4;
2707                 while (++i <= max) {
2708                         char[] charArray = table[i];
2709                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2710                                 return charArray;
2711                 }
2712                 // --------add the entry-------
2713                 if (++max >= InternalTableSize)
2714                         max = 0;
2715                 char[] r;
2716                 table[max] = (r = new char[] { c0, c1, c2, c3 });
2717                 newEntry4 = max;
2718                 return r;
2719         }
2720
2721         final char[] optimizedCurrentTokenSource5() {
2722                 // try to return the same char[] build only once
2723                 char c0, c1, c2, c3, c4;
2724                 long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2725                                 + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2726                                 % TableSize;
2727                 char[][] table = charArray_length[3][(int) hash];
2728                 int i = newEntry5;
2729                 while (++i < InternalTableSize) {
2730                         char[] charArray = table[i];
2731                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2732                                 return charArray;
2733                 }
2734                 // ---------other side---------
2735                 i = -1;
2736                 int max = newEntry5;
2737                 while (++i <= max) {
2738                         char[] charArray = table[i];
2739                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2740                                 return charArray;
2741                 }
2742                 // --------add the entry-------
2743                 if (++max >= InternalTableSize)
2744                         max = 0;
2745                 char[] r;
2746                 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2747                 newEntry5 = max;
2748                 return r;
2749         }
2750
2751         final char[] optimizedCurrentTokenSource6() {
2752                 // try to return the same char[] build only once
2753                 char c0, c1, c2, c3, c4, c5;
2754                 long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2755                                 + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2756                                 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2757                                 % TableSize;
2758                 char[][] table = charArray_length[4][(int) hash];
2759                 int i = newEntry6;
2760                 while (++i < InternalTableSize) {
2761                         char[] charArray = table[i];
2762                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2763                                         && (c5 == charArray[5]))
2764                                 return charArray;
2765                 }
2766                 // ---------other side---------
2767                 i = -1;
2768                 int max = newEntry6;
2769                 while (++i <= max) {
2770                         char[] charArray = table[i];
2771                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2772                                         && (c5 == charArray[5]))
2773                                 return charArray;
2774                 }
2775                 // --------add the entry-------
2776                 if (++max >= InternalTableSize)
2777                         max = 0;
2778                 char[] r;
2779                 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2780                 newEntry6 = max;
2781                 return r;
2782         }
2783
2784         public final void pushLineSeparator() throws InvalidInputException {
2785                 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2786                 final int INCREMENT = 250;
2787                 if (this.checkNonExternalizedStringLiterals) {
2788                         // reinitialize the current line for non externalize strings purpose
2789                         currentLine = null;
2790                 }
2791                 // currentCharacter is at position currentPosition-1
2792                 // cr 000D
2793                 if (currentCharacter == '\r') {
2794                         int separatorPos = currentPosition - 1;
2795                         if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2796                                 return;
2797                         // System.out.println("CR-" + separatorPos);
2798                         try {
2799                                 lineEnds[++linePtr] = separatorPos;
2800                         } catch (IndexOutOfBoundsException e) {
2801                                 // linePtr value is correct
2802                                 int oldLength = lineEnds.length;
2803                                 int[] old = lineEnds;
2804                                 lineEnds = new int[oldLength + INCREMENT];
2805                                 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2806                                 lineEnds[linePtr] = separatorPos;
2807                         }
2808                         // look-ahead for merged cr+lf
2809                         try {
2810                                 if (source[currentPosition] == '\n') {
2811                                         // System.out.println("look-ahead LF-" + currentPosition);
2812                                         lineEnds[linePtr] = currentPosition;
2813                                         currentPosition++;
2814                                         wasAcr = false;
2815                                 } else {
2816                                         wasAcr = true;
2817                                 }
2818                         } catch (IndexOutOfBoundsException e) {
2819                                 wasAcr = true;
2820                         }
2821                 } else {
2822                         // lf 000A
2823                         if (currentCharacter == '\n') {
2824                                 // must merge eventual cr followed by lf
2825                                 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2826                                         // System.out.println("merge LF-" + (currentPosition - 1));
2827                                         lineEnds[linePtr] = currentPosition - 1;
2828                                 } else {
2829                                         int separatorPos = currentPosition - 1;
2830                                         if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2831                                                 return;
2832                                         // System.out.println("LF-" + separatorPos);
2833                                         try {
2834                                                 lineEnds[++linePtr] = separatorPos;
2835                                         } catch (IndexOutOfBoundsException e) {
2836                                                 // linePtr value is correct
2837                                                 int oldLength = lineEnds.length;
2838                                                 int[] old = lineEnds;
2839                                                 lineEnds = new int[oldLength + INCREMENT];
2840                                                 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2841                                                 lineEnds[linePtr] = separatorPos;
2842                                         }
2843                                 }
2844                                 wasAcr = false;
2845                         }
2846                 }
2847         }
2848
2849         public final void pushUnicodeLineSeparator() {
2850                 // isUnicode means that the \r or \n has been read as a unicode character
2851                 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2852                 final int INCREMENT = 250;
2853                 // currentCharacter is at position currentPosition-1
2854                 if (this.checkNonExternalizedStringLiterals) {
2855                         // reinitialize the current line for non externalize strings purpose
2856                         currentLine = null;
2857                 }
2858                 // cr 000D
2859                 if (currentCharacter == '\r') {
2860                         int separatorPos = currentPosition - 6;
2861                         if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2862                                 return;
2863                         // System.out.println("CR-" + separatorPos);
2864                         try {
2865                                 lineEnds[++linePtr] = separatorPos;
2866                         } catch (IndexOutOfBoundsException e) {
2867                                 // linePtr value is correct
2868                                 int oldLength = lineEnds.length;
2869                                 int[] old = lineEnds;
2870                                 lineEnds = new int[oldLength + INCREMENT];
2871                                 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2872                                 lineEnds[linePtr] = separatorPos;
2873                         }
2874                         // look-ahead for merged cr+lf
2875                         if (source[currentPosition] == '\n') {
2876                                 // System.out.println("look-ahead LF-" + currentPosition);
2877                                 lineEnds[linePtr] = currentPosition;
2878                                 currentPosition++;
2879                                 wasAcr = false;
2880                         } else {
2881                                 wasAcr = true;
2882                         }
2883                 } else {
2884                         // lf 000A
2885                         if (currentCharacter == '\n') {
2886                                 // must merge eventual cr followed by lf
2887                                 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2888                                         // System.out.println("merge LF-" + (currentPosition - 1));
2889                                         lineEnds[linePtr] = currentPosition - 6;
2890                                 } else {
2891                                         int separatorPos = currentPosition - 6;
2892                                         if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2893                                                 return;
2894                                         // System.out.println("LF-" + separatorPos);
2895                                         try {
2896                                                 lineEnds[++linePtr] = separatorPos;
2897                                         } catch (IndexOutOfBoundsException e) {
2898                                                 // linePtr value is correct
2899                                                 int oldLength = lineEnds.length;
2900                                                 int[] old = lineEnds;
2901                                                 lineEnds = new int[oldLength + INCREMENT];
2902                                                 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2903                                                 lineEnds[linePtr] = separatorPos;
2904                                         }
2905                                 }
2906                                 wasAcr = false;
2907                         }
2908                 }
2909         }
2910
2911         public void recordComment(int token) {
2912                 // compute position
2913                 int stopPosition = this.currentPosition;
2914                 switch (token) {
2915                 case TokenNameCOMMENT_LINE:
2916                         stopPosition = -this.lastCommentLinePosition;
2917                         break;
2918                 case TokenNameCOMMENT_BLOCK:
2919                         stopPosition = -this.currentPosition;
2920                         break;
2921                 }
2922
2923                 // a new comment is recorded
2924                 int length = this.commentStops.length;
2925                 if (++this.commentPtr >= length) {
2926                         System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2927                         // grows the positions buffers too
2928                         System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2929                 }
2930                 this.commentStops[this.commentPtr] = stopPosition;
2931                 this.commentStarts[this.commentPtr] = this.startPosition;
2932         }
2933
2934         // public final void recordComment(boolean isJavadoc) {
2935         // // a new annotation comment is recorded
2936         // try {
2937         // commentStops[++commentPtr] = isJavadoc
2938         // ? currentPosition
2939         // : -currentPosition;
2940         // } catch (IndexOutOfBoundsException e) {
2941         // int oldStackLength = commentStops.length;
2942         // int[] oldStack = commentStops;
2943         // commentStops = new int[oldStackLength + 30];
2944         // System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2945         // commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2946         // //grows the positions buffers too
2947         // int[] old = commentStarts;
2948         // commentStarts = new int[oldStackLength + 30];
2949         // System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2950         // }
2951         // //the buffer is of a correct size here
2952         // commentStarts[commentPtr] = startPosition;
2953         // }
2954         public void resetTo(int begin, int end) {
2955                 // reset the scanner to a given position where it may rescan again
2956                 diet = false;
2957                 initialPosition = startPosition = currentPosition = begin;
2958                 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2959                 commentPtr = -1; // reset comment stack
2960         }
2961
2962         public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2963                 // the string with "\\u" is a legal string of two chars \ and u
2964                 // thus we use a direct access to the source (for regular cases).
2965                 // if (unicodeAsBackSlash) {
2966                 // // consume next character
2967                 // unicodeAsBackSlash = false;
2968                 // if (((currentCharacter = source[currentPosition++]) == '\\')
2969                 // && (source[currentPosition] == 'u')) {
2970                 // getNextUnicodeChar();
2971                 // } else {
2972                 // if (withoutUnicodePtr != 0) {
2973                 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2974                 // }
2975                 // }
2976                 // } else
2977                 currentCharacter = source[currentPosition++];
2978                 switch (currentCharacter) {
2979                 case '\'':
2980                         currentCharacter = '\'';
2981                         break;
2982                 case '\\':
2983                         currentCharacter = '\\';
2984                         break;
2985                 default:
2986                         currentCharacter = '\\';
2987                         currentPosition--;
2988                 }
2989         }
2990
2991         public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2992                 currentCharacter = source[currentPosition++];
2993                 switch (currentCharacter) {
2994                 // case 'b' :
2995                 // currentCharacter = '\b';
2996                 // break;
2997                 case 't':
2998                         currentCharacter = '\t';
2999                         break;
3000                 case 'n':
3001                         currentCharacter = '\n';
3002                         break;
3003                 // case 'f' :
3004                 // currentCharacter = '\f';
3005                 // break;
3006                 case 'r':
3007                         currentCharacter = '\r';
3008                         break;
3009                 case '\"':
3010                         currentCharacter = '\"';
3011                         break;
3012                 case '\'':
3013                         currentCharacter = '\'';
3014                         break;
3015                 case '\\':
3016                         currentCharacter = '\\';
3017                         break;
3018                 case '$':
3019                         currentCharacter = '$';
3020                         break;
3021                 default:
3022                         // -----------octal escape--------------
3023                         // OctalDigit
3024                         // OctalDigit OctalDigit
3025                         // ZeroToThree OctalDigit OctalDigit
3026                         int number = Character.getNumericValue(currentCharacter);
3027                         if (number >= 0 && number <= 7) {
3028                                 boolean zeroToThreeNot = number > 3;
3029                                 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3030                                         int digit = Character.getNumericValue(currentCharacter);
3031                                         if (digit >= 0 && digit <= 7) {
3032                                                 number = (number * 8) + digit;
3033                                                 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3034                                                         if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
3035                                                                 // Digit --> ignore last character
3036                                                                 currentPosition--;
3037                                                         } else {
3038                                                                 digit = Character.getNumericValue(currentCharacter);
3039                                                                 if (digit >= 0 && digit <= 7) {
3040                                                                         // has read \ZeroToThree OctalDigit OctalDigit
3041                                                                         number = (number * 8) + digit;
3042                                                                 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
3043                                                                         // --> ignore last character
3044                                                                         currentPosition--;
3045                                                                 }
3046                                                         }
3047                                                 } else { // has read \OctalDigit NonDigit--> ignore last
3048                                                         // character
3049                                                         currentPosition--;
3050                                                 }
3051                                         } else { // has read \OctalDigit NonOctalDigit--> ignore last
3052                                                 // character
3053                                                 currentPosition--;
3054                                         }
3055                                 } else { // has read \OctalDigit --> ignore last character
3056                                         currentPosition--;
3057                                 }
3058                                 if (number > 255)
3059                                         throw new InvalidInputException(INVALID_ESCAPE);
3060                                 currentCharacter = (char) number;
3061                         }
3062                 // else
3063                 // throw new InvalidInputException(INVALID_ESCAPE);
3064                 }
3065         }
3066
3067         // public int scanIdentifierOrKeyword() throws InvalidInputException {
3068         // return scanIdentifierOrKeyword( false );
3069         // }
3070         public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
3071                 // test keywords
3072                 // first dispatch on the first char.
3073                 // then the length. If there are several
3074                 // keywors with the same length AND the same first char, then do another
3075                 // disptach on the second char :-)...cool....but fast !
3076                 useAssertAsAnIndentifier = false;
3077                 while (getNextCharAsJavaIdentifierPart()) {
3078                 }
3079                 ;
3080                 if (isVariable) {
3081                         // if (new String(getCurrentTokenSource()).equals("$this")) {
3082                         // return TokenNamethis;
3083                         // }
3084                         return TokenNameVariable;
3085                 }
3086                 int index, length;
3087                 char[] data;
3088                 char firstLetter;
3089                 // if (withoutUnicodePtr == 0)
3090                 // quick test on length == 1 but not on length > 12 while most identifier
3091                 // have a length which is <= 12...but there are lots of identifier with
3092                 // only one char....
3093                 // {
3094                 if ((length = currentPosition - startPosition) == 1)
3095                         return TokenNameIdentifier;
3096                 // data = source;
3097                 data = new char[length];
3098                 index = startPosition;
3099                 for (int i = 0; i < length; i++) {
3100                         data[i] = Character.toLowerCase(source[index + i]);
3101                 }
3102                 index = 0;
3103                 // } else {
3104                 // if ((length = withoutUnicodePtr) == 1)
3105                 // return TokenNameIdentifier;
3106                 // // data = withoutUnicodeBuffer;
3107                 // data = new char[withoutUnicodeBuffer.length];
3108                 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
3109                 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
3110                 // }
3111                 // index = 1;
3112                 // }
3113                 firstLetter = data[index];
3114                 switch (firstLetter) {
3115                 case '_':
3116                         switch (length) {
3117                         case 8:
3118                                 // __FILE__
3119                                 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
3120                                                 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3121                                         return TokenNameFILE;
3122                                 index = 0; // __LINE__
3123                                 if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
3124                                                 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3125                                         return TokenNameLINE;
3126                                 break;
3127                         case 9:
3128                                 // __CLASS__
3129                                 if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3130                                                 && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
3131                                         return TokenNameCLASS_C;
3132                                 break;
3133                         case 11:
3134                                 // __METHOD__
3135                                 if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
3136                                                 && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
3137                                                 && (data[++index] == '_'))
3138                                         return TokenNameMETHOD_C;
3139                                 break;
3140                         case 12:
3141                                 // __FUNCTION__
3142                                 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
3143                                                 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
3144                                                 && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
3145                                         return TokenNameFUNC_C;
3146                                 break;
3147                         }
3148                         return TokenNameIdentifier;
3149                 case 'a':
3150                         // as and array abstract
3151                         switch (length) {
3152                         case 2:
3153                                 // as
3154                                 if ((data[++index] == 's')) {
3155                                         return TokenNameas;
3156                                 } else {
3157                                         return TokenNameIdentifier;
3158                                 }
3159                         case 3:
3160                                 // and
3161                                 if ((data[++index] == 'n') && (data[++index] == 'd')) {
3162                                         return TokenNameand;
3163                                 } else {
3164                                         return TokenNameIdentifier;
3165                                 }
3166                         case 5:
3167                                 // array
3168                                 if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3169                                         return TokenNamearray;
3170                                 else
3171                                         return TokenNameIdentifier;
3172                         case 8:
3173                                 if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3174                                                 && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3175                                         return TokenNameabstract;
3176                                 else
3177                                         return TokenNameIdentifier;
3178                         default:
3179                                 return TokenNameIdentifier;
3180                         }
3181                 case 'b':
3182                         // break
3183                         switch (length) {
3184                         case 5:
3185                                 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3186                                         return TokenNamebreak;
3187                                 else
3188                                         return TokenNameIdentifier;
3189                         default:
3190                                 return TokenNameIdentifier;
3191                         }
3192                 case 'c':
3193                         // case catch class clone const continue
3194                         switch (length) {
3195                         case 4:
3196                                 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3197                                         return TokenNamecase;
3198                                 else
3199                                         return TokenNameIdentifier;
3200                         case 5:
3201                                 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3202                                         return TokenNamecatch;
3203                                 index = 0;
3204                                 if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3205                                         return TokenNameclass;
3206                                 index = 0;
3207                                 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3208                                         return TokenNameclone;
3209                                 index = 0;
3210                                 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3211                                         return TokenNameconst;
3212                                 else
3213                                         return TokenNameIdentifier;
3214                         case 8:
3215                                 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3216                                                 && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3217                                         return TokenNamecontinue;
3218                                 else
3219                                         return TokenNameIdentifier;
3220                         default:
3221                                 return TokenNameIdentifier;
3222                         }
3223                 case 'd':
3224                         // declare default do die
3225                         // TODO delete define ==> no keyword !
3226                         switch (length) {
3227                         case 2:
3228                                 if ((data[++index] == 'o'))
3229                                         return TokenNamedo;
3230                                 else
3231                                         return TokenNameIdentifier;
3232                         // case 6 :
3233                         // if ((data[++index] == 'e')
3234                         // && (data[++index] == 'f')
3235                         // && (data[++index] == 'i')
3236                         // && (data[++index] == 'n')
3237                         // && (data[++index] == 'e'))
3238                         // return TokenNamedefine;
3239                         // else
3240                         // return TokenNameIdentifier;
3241                         case 7:
3242                                 if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3243                                                 && (data[++index] == 'r') && (data[++index] == 'e'))
3244                                         return TokenNamedeclare;
3245                                 index = 0;
3246                                 if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3247                                                 && (data[++index] == 'l') && (data[++index] == 't'))
3248                                         return TokenNamedefault;
3249                                 else
3250                                         return TokenNameIdentifier;
3251                         default:
3252                                 return TokenNameIdentifier;
3253                         }
3254                 case 'e':
3255                         // echo else exit elseif extends eval
3256                         switch (length) {
3257                         case 4:
3258                                 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3259                                         return TokenNameecho;
3260                                 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3261                                         return TokenNameelse;
3262                                 else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3263                                         return TokenNameexit;
3264                                 else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3265                                         return TokenNameeval;
3266                                 else
3267                                         return TokenNameIdentifier;
3268                         case 5:
3269                                 // endif empty
3270                                 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3271                                         return TokenNameendif;
3272                                 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3273                                         return TokenNameempty;
3274                                 else
3275                                         return TokenNameIdentifier;
3276                         case 6:
3277                                 // endfor
3278                                 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3279                                                 && (data[++index] == 'r'))
3280                                         return TokenNameendfor;
3281                                 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3282                                                 && (data[++index] == 'f'))
3283                                         return TokenNameelseif;
3284                                 else
3285                                         return TokenNameIdentifier;
3286                         case 7:
3287                                 if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3288                                                 && (data[++index] == 'd') && (data[++index] == 's'))
3289                                         return TokenNameextends;
3290                                 else
3291                                         return TokenNameIdentifier;
3292                         case 8:
3293                                 // endwhile
3294                                 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3295                                                 && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3296                                         return TokenNameendwhile;
3297                                 else
3298                                         return TokenNameIdentifier;
3299                         case 9:
3300                                 // endswitch
3301                                 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3302                                                 && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3303                                         return TokenNameendswitch;
3304                                 else
3305                                         return TokenNameIdentifier;
3306                         case 10:
3307                                 // enddeclare
3308                                 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3309                                                 && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3310                                                 && (data[++index] == 'e'))
3311                                         return TokenNameenddeclare;
3312                                 index = 0;
3313                                 if ((data[++index] == 'n') // endforeach
3314                                                 && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3315                                                 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3316                                         return TokenNameendforeach;
3317                                 else
3318                                         return TokenNameIdentifier;
3319                         default:
3320                                 return TokenNameIdentifier;
3321                         }
3322                 case 'f':
3323                         // for false final function
3324                         switch (length) {
3325                         case 3:
3326                                 if ((data[++index] == 'o') && (data[++index] == 'r'))
3327                                         return TokenNamefor;
3328                                 else
3329                                         return TokenNameIdentifier;
3330                         case 5:
3331                                 // if ((data[++index] == 'a') && (data[++index] == 'l')
3332                                 // && (data[++index] == 's') && (data[++index] == 'e'))
3333                                 // return TokenNamefalse;
3334                                 if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3335                                         return TokenNamefinal;
3336                                 else
3337                                         return TokenNameIdentifier;
3338                         case 7:
3339                                 // foreach
3340                                 if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3341                                                 && (data[++index] == 'c') && (data[++index] == 'h'))
3342                                         return TokenNameforeach;
3343                                 else
3344                                         return TokenNameIdentifier;
3345                         case 8:
3346                                 // function
3347                                 if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3348                                                 && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3349                                         return TokenNamefunction;
3350                                 else
3351                                         return TokenNameIdentifier;
3352                         default:
3353                                 return TokenNameIdentifier;
3354                         }
3355                 case 'g':
3356                         // global
3357                         if (length == 6) {
3358                                 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3359                                                 && (data[++index] == 'l')) {
3360                                         return TokenNameglobal;
3361                                 }
3362                         }
3363                         return TokenNameIdentifier;
3364                 case 'i':
3365                         // if int isset include include_once instanceof interface implements
3366                         switch (length) {
3367                         case 2:
3368                                 if (data[++index] == 'f')
3369                                         return TokenNameif;
3370                                 else
3371                                         return TokenNameIdentifier;
3372                         // case 3 :
3373                         // if ((data[++index] == 'n') && (data[++index] == 't'))
3374                         // return TokenNameint;
3375                         // else
3376                         // return TokenNameIdentifier;
3377                         case 5:
3378                                 if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3379                                         return TokenNameisset;
3380                                 else
3381                                         return TokenNameIdentifier;
3382                         case 7:
3383                                 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3384                                                 && (data[++index] == 'd') && (data[++index] == 'e'))
3385                                         return TokenNameinclude;
3386                                 else
3387                                         return TokenNameIdentifier;
3388                         case 9:
3389                                 // interface
3390                                 if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3391                                                 && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3392                                         return TokenNameinterface;
3393                                 else
3394                                         return TokenNameIdentifier;
3395                         case 10:
3396                                 // instanceof
3397                                 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3398                                                 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3399                                                 && (data[++index] == 'f'))
3400                                         return TokenNameinstanceof;
3401                                 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3402                                                 && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3403                                                 && (data[++index] == 's'))
3404                                         return TokenNameimplements;
3405                                 else
3406                                         return TokenNameIdentifier;
3407                         case 12:
3408                                 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3409                                                 && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3410                                                 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3411                                         return TokenNameinclude_once;
3412                                 else
3413                                         return TokenNameIdentifier;
3414                         default:
3415                                 return TokenNameIdentifier;
3416                         }
3417                 case 'l':
3418                         // list
3419                         if (length == 4) {
3420                                 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3421                                         return TokenNamelist;
3422                                 }
3423                         }
3424                         return TokenNameIdentifier;
3425                 case 'n':
3426                         // new null
3427                         switch (length) {
3428                         case 3:
3429                                 if ((data[++index] == 'e') && (data[++index] == 'w'))
3430                                         return TokenNamenew;
3431                                 else
3432                                         return TokenNameIdentifier;
3433                         // case 4 :
3434                         // if ((data[++index] == 'u') && (data[++index] == 'l')
3435                         // && (data[++index] == 'l'))
3436                         // return TokenNamenull;
3437                         // else
3438                         // return TokenNameIdentifier;
3439                         default:
3440                                 return TokenNameIdentifier;
3441                         }
3442                 case 'o':
3443                         // or old_function
3444                         if (length == 2) {
3445                                 if (data[++index] == 'r') {
3446                                         return TokenNameor;
3447                                 }
3448                         }
3449                         // if (length == 12) {
3450                         // if ((data[++index] == 'l')
3451                         // && (data[++index] == 'd')
3452                         // && (data[++index] == '_')
3453                         // && (data[++index] == 'f')
3454                         // && (data[++index] == 'u')
3455                         // && (data[++index] == 'n')
3456                         // && (data[++index] == 'c')
3457                         // && (data[++index] == 't')
3458                         // && (data[++index] == 'i')
3459                         // && (data[++index] == 'o')
3460                         // && (data[++index] == 'n')) {
3461                         // return TokenNameold_function;
3462                         // }
3463                         // }
3464                         return TokenNameIdentifier;
3465                 case 'p':
3466                         // print public private protected
3467                         switch (length) {
3468                         case 5:
3469                                 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3470                                         return TokenNameprint;
3471                                 } else
3472                                         return TokenNameIdentifier;
3473                         case 6:
3474                                 if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3475                                                 && (data[++index] == 'c')) {
3476                                         return TokenNamepublic;
3477                                 } else
3478                                         return TokenNameIdentifier;
3479                         case 7:
3480                                 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3481                                                 && (data[++index] == 't') && (data[++index] == 'e')) {
3482                                         return TokenNameprivate;
3483                                 } else
3484                                         return TokenNameIdentifier;
3485                         case 9:
3486                                 if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3487                                                 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3488                                         return TokenNameprotected;
3489                                 } else
3490                                         return TokenNameIdentifier;
3491                         }
3492                         return TokenNameIdentifier;
3493                 case 'r':
3494                         // return require require_once
3495                         if (length == 6) {
3496                                 if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3497                                                 && (data[++index] == 'n')) {
3498                                         return TokenNamereturn;
3499                                 }
3500                         } else if (length == 7) {
3501                                 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3502                                                 && (data[++index] == 'r') && (data[++index] == 'e')) {
3503                                         return TokenNamerequire;
3504                                 }
3505                         } else if (length == 12) {
3506                                 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3507                                                 && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3508                                                 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3509                                         return TokenNamerequire_once;
3510                                 }
3511                         } else
3512                                 return TokenNameIdentifier;
3513                 case 's':
3514                         // static switch
3515                         switch (length) {
3516                         case 6:
3517                                 if (data[++index] == 't')
3518                                         if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3519                                                 return TokenNamestatic;
3520                                         } else
3521                                                 return TokenNameIdentifier;
3522                                 else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3523                                                 && (data[++index] == 'h'))
3524                                         return TokenNameswitch;
3525                                 else
3526                                         return TokenNameIdentifier;
3527                         default:
3528                                 return TokenNameIdentifier;
3529                         }
3530                 case 't':
3531                         // try true throw
3532                         switch (length) {
3533                         case 3:
3534                                 if ((data[++index] == 'r') && (data[++index] == 'y'))
3535                                         return TokenNametry;
3536                                 else
3537                                         return TokenNameIdentifier;
3538                         // case 4 :
3539                         // if ((data[++index] == 'r') && (data[++index] == 'u')
3540                         // && (data[++index] == 'e'))
3541                         // return TokenNametrue;
3542                         // else
3543                         // return TokenNameIdentifier;
3544                         case 5:
3545                                 if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3546                                         return TokenNamethrow;
3547                                 else
3548                                         return TokenNameIdentifier;
3549                         default:
3550                                 return TokenNameIdentifier;
3551                         }
3552                 case 'u':
3553                         // use unset
3554                         switch (length) {
3555                         case 3:
3556                                 if ((data[++index] == 's') && (data[++index] == 'e'))
3557                                         return TokenNameuse;
3558                                 else
3559                                         return TokenNameIdentifier;
3560                         case 5:
3561                                 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3562                                         return TokenNameunset;
3563                                 else
3564                                         return TokenNameIdentifier;
3565                         default:
3566                                 return TokenNameIdentifier;
3567                         }
3568                 case 'v':
3569                         // var
3570                         switch (length) {
3571                         case 3:
3572                                 if ((data[++index] == 'a') && (data[++index] == 'r'))
3573                                         return TokenNamevar;
3574                                 else
3575                                         return TokenNameIdentifier;
3576                         default:
3577                                 return TokenNameIdentifier;
3578                         }
3579                 case 'w':
3580                         // while
3581                         switch (length) {
3582                         case 5:
3583                                 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3584                                         return TokenNamewhile;
3585                                 else
3586                                         return TokenNameIdentifier;
3587                         // case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3588                         // (data[++index]=='e') && (data[++index]=='f')&&
3589                         // (data[++index]=='p'))
3590                         // return TokenNamewidefp ;
3591                         // else
3592                         // return TokenNameIdentifier;
3593                         default:
3594                                 return TokenNameIdentifier;
3595                         }
3596                 case 'x':
3597                         // xor
3598                         switch (length) {
3599                         case 3:
3600                                 if ((data[++index] == 'o') && (data[++index] == 'r'))
3601                                         return TokenNamexor;
3602                                 else
3603                                         return TokenNameIdentifier;
3604                         default:
3605                                 return TokenNameIdentifier;
3606                         }
3607                 default:
3608                         return TokenNameIdentifier;
3609                 }
3610         }
3611
3612         public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3613                 // when entering this method the currentCharacter is the firt
3614                 // digit of the number , i.e. it may be preceeded by a . when
3615                 // dotPrefix is true
3616                 boolean floating = dotPrefix;
3617                 if ((!dotPrefix) && (currentCharacter == '0')) {
3618                         if (getNextChar('x', 'X') >= 0) { // ----------hexa-----------------
3619                                 // force the first char of the hexa number do exist...
3620                                 // consume next character
3621                                 unicodeAsBackSlash = false;
3622                                 currentCharacter = source[currentPosition++];
3623                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
3624                                 // && (source[currentPosition] == 'u')) {
3625                                 // getNextUnicodeChar();
3626                                 // } else {
3627                                 // if (withoutUnicodePtr != 0) {
3628                                 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3629                                 // }
3630                                 // }
3631                                 if (Character.digit(currentCharacter, 16) == -1)
3632                                         throw new InvalidInputException(INVALID_HEXA);
3633                                 // ---end forcing--
3634                                 while (getNextCharAsDigit(16)) {
3635                                 }
3636                                 ;
3637                                 // if (getNextChar('l', 'L') >= 0)
3638                                 // return TokenNameLongLiteral;
3639                                 // else
3640                                 return TokenNameIntegerLiteral;
3641                         }
3642                         // there is x or X in the number
3643                         // potential octal ! ... some one may write 000099.0 ! thus 00100 <
3644                         // 00078.0 is true !!!!! crazy language
3645                         if (getNextCharAsDigit()) {
3646                                 // -------------potential octal-----------------
3647                                 while (getNextCharAsDigit()) {
3648                                 }
3649                                 ;
3650                                 // if (getNextChar('l', 'L') >= 0) {
3651                                 // return TokenNameLongLiteral;
3652                                 // }
3653                                 //
3654                                 // if (getNextChar('f', 'F') >= 0) {
3655                                 // return TokenNameFloatingPointLiteral;
3656                                 // }
3657                                 if (getNextChar('d', 'D') >= 0) {
3658                                         return TokenNameDoubleLiteral;
3659                                 } else { // make the distinction between octal and float ....
3660                                         if (getNextChar('.')) { // bingo ! ....
3661                                                 while (getNextCharAsDigit()) {
3662                                                 }
3663                                                 ;
3664                                                 if (getNextChar('e', 'E') >= 0) {
3665                                                         // consume next character
3666                                                         unicodeAsBackSlash = false;
3667                                                         currentCharacter = source[currentPosition++];
3668                                                         // if (((currentCharacter = source[currentPosition++]) == '\\')
3669                                                         // && (source[currentPosition] == 'u')) {
3670                                                         // getNextUnicodeChar();
3671                                                         // } else {
3672                                                         // if (withoutUnicodePtr != 0) {
3673                                                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3674                                                         // }
3675                                                         // }
3676                                                         if ((currentCharacter == '-') || (currentCharacter == '+')) {
3677                                                                 // consume next character
3678                                                                 unicodeAsBackSlash = false;
3679                                                                 currentCharacter = source[currentPosition++];
3680                                                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
3681                                                                 // && (source[currentPosition] == 'u')) {
3682                                                                 // getNextUnicodeChar();
3683                                                                 // } else {
3684                                                                 // if (withoutUnicodePtr != 0) {
3685                                                                 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3686                                                                 // currentCharacter;
3687                                                                 // }
3688                                                                 // }
3689                                                         }
3690                                                         if (!Character.isDigit(currentCharacter))
3691                                                                 throw new InvalidInputException(INVALID_FLOAT);
3692                                                         while (getNextCharAsDigit()) {
3693                                                         }
3694                                                         ;
3695                                                 }
3696                                                 // if (getNextChar('f', 'F') >= 0)
3697                                                 // return TokenNameFloatingPointLiteral;
3698                                                 getNextChar('d', 'D'); // jump over potential d or D
3699                                                 return TokenNameDoubleLiteral;
3700                                         } else {
3701                                                 return TokenNameIntegerLiteral;
3702                                         }
3703                                 }
3704                         } else {
3705                                 /* carry on */
3706                         }
3707                 }
3708                 while (getNextCharAsDigit()) {
3709                 }
3710                 ;
3711                 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3712                 // return TokenNameLongLiteral;
3713                 if ((!dotPrefix) && (getNextChar('.'))) { // decimal part that can be empty
3714                         while (getNextCharAsDigit()) {
3715                         }
3716                         ;
3717                         floating = true;
3718                 }
3719                 // if floating is true both exponant and suffix may be optional
3720                 if (getNextChar('e', 'E') >= 0) {
3721                         floating = true;
3722                         // consume next character
3723                         unicodeAsBackSlash = false;
3724                         currentCharacter = source[currentPosition++];
3725                         // if (((currentCharacter = source[currentPosition++]) == '\\')
3726                         // && (source[currentPosition] == 'u')) {
3727                         // getNextUnicodeChar();
3728                         // } else {
3729                         // if (withoutUnicodePtr != 0) {
3730                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3731                         // }
3732                         // }
3733                         if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3734                                 // next
3735                                 // character
3736                                 unicodeAsBackSlash = false;
3737                                 currentCharacter = source[currentPosition++];
3738                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
3739                                 // && (source[currentPosition] == 'u')) {
3740                                 // getNextUnicodeChar();
3741                                 // } else {
3742                                 // if (withoutUnicodePtr != 0) {
3743                                 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3744                                 // }
3745                                 // }
3746                         }
3747                         if (!Character.isDigit(currentCharacter))
3748                                 throw new InvalidInputException(INVALID_FLOAT);
3749                         while (getNextCharAsDigit()) {
3750                         }
3751                         ;
3752                 }
3753                 if (getNextChar('d', 'D') >= 0)
3754                         return TokenNameDoubleLiteral;
3755                 // if (getNextChar('f', 'F') >= 0)
3756                 // return TokenNameFloatingPointLiteral;
3757                 // the long flag has been tested before
3758                 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3759         }
3760
3761         /**
3762          * Search the line number corresponding to a specific position
3763          *
3764          */
3765         public final int getLineNumber(int position) {
3766                 if (lineEnds == null)
3767                         return 1;
3768                 int length = linePtr + 1;
3769                 if (length == 0)
3770                         return 1;
3771                 int g = 0, d = length - 1;
3772                 int m = 0;
3773                 while (g <= d) {
3774                         m = (g + d) / 2;
3775                         if (position < lineEnds[m]) {
3776                                 d = m - 1;
3777                         } else if (position > lineEnds[m]) {
3778                                 g = m + 1;
3779                         } else {
3780                                 return m + 1;
3781                         }
3782                 }
3783                 if (position < lineEnds[m]) {
3784                         return m + 1;
3785                 }
3786                 return m + 2;
3787         }
3788
3789         public void setPHPMode(boolean mode) {
3790                 phpMode = mode;
3791         }
3792
3793         public final void setSource(char[] source) {
3794                 setSource(null, source);
3795         }
3796
3797         public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3798                 // the source-buffer is set to sourceString
3799                 this.compilationUnit = compilationUnit;
3800                 if (source == null) {
3801                         this.source = new char[0];
3802                 } else {
3803                         this.source = source;
3804                 }
3805                 startPosition = -1;
3806                 initialPosition = currentPosition = 0;
3807                 containsAssertKeyword = false;
3808                 withoutUnicodeBuffer = new char[this.source.length];
3809                 // encapsedStringStack = new Stack();
3810         }
3811
3812         public String toString() {
3813                 if (startPosition == source.length)
3814                         return "EOF\n\n" + new String(source); //$NON-NLS-1$
3815                 if (currentPosition > source.length)
3816                         return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3817                 char front[] = new char[startPosition];
3818                 System.arraycopy(source, 0, front, 0, startPosition);
3819                 int middleLength = (currentPosition - 1) - startPosition + 1;
3820                 char middle[];
3821                 if (middleLength > -1) {
3822                         middle = new char[middleLength];
3823                         System.arraycopy(source, startPosition, middle, 0, middleLength);
3824                 } else {
3825                         middle = new char[0];
3826                 }
3827                 char end[] = new char[source.length - (currentPosition - 1)];
3828                 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3829                 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3830                                 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3831                                 + new String(end);
3832         }
3833
3834         public final String toStringAction(int act) {
3835                 switch (act) {
3836                 case TokenNameERROR:
3837                         return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3838                 // //$NON-NLS-1$
3839                 case TokenNameINLINE_HTML:
3840                         return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3841                 case TokenNameIdentifier:
3842                         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3843                 case TokenNameVariable:
3844                         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3845                 case TokenNameabstract:
3846                         return "abstract"; //$NON-NLS-1$
3847                 case TokenNameand:
3848                         return "AND"; //$NON-NLS-1$
3849                 case TokenNamearray:
3850                         return "array"; //$NON-NLS-1$
3851                 case TokenNameas:
3852                         return "as"; //$NON-NLS-1$
3853                 case TokenNamebreak:
3854                         return "break"; //$NON-NLS-1$
3855                 case TokenNamecase:
3856                         return "case"; //$NON-NLS-1$
3857                 case TokenNameclass:
3858                         return "class"; //$NON-NLS-1$
3859                 case TokenNamecatch:
3860                         return "catch"; //$NON-NLS-1$
3861                 case TokenNameclone:
3862                         //$NON-NLS-1$
3863                         return "clone";
3864                 case TokenNameconst:
3865                         //$NON-NLS-1$
3866                         return "const";
3867                 case TokenNamecontinue:
3868                         return "continue"; //$NON-NLS-1$
3869                 case TokenNamedefault:
3870                         return "default"; //$NON-NLS-1$
3871                 // case TokenNamedefine :
3872                 // return "define"; //$NON-NLS-1$
3873                 case TokenNamedo:
3874                         return "do"; //$NON-NLS-1$
3875                 case TokenNameecho:
3876                         return "echo"; //$NON-NLS-1$
3877                 case TokenNameelse:
3878                         return "else"; //$NON-NLS-1$
3879                 case TokenNameelseif:
3880                         return "elseif"; //$NON-NLS-1$
3881                 case TokenNameendfor:
3882                         return "endfor"; //$NON-NLS-1$
3883                 case TokenNameendforeach:
3884                         return "endforeach"; //$NON-NLS-1$
3885                 case TokenNameendif:
3886                         return "endif"; //$NON-NLS-1$
3887                 case TokenNameendswitch:
3888                         return "endswitch"; //$NON-NLS-1$
3889                 case TokenNameendwhile:
3890                         return "endwhile"; //$NON-NLS-1$
3891                 case TokenNameexit:
3892                         return "exit";
3893                 case TokenNameextends:
3894                         return "extends"; //$NON-NLS-1$
3895                 // case TokenNamefalse :
3896                 // return "false"; //$NON-NLS-1$
3897                 case TokenNamefinal:
3898                         return "final"; //$NON-NLS-1$
3899                 case TokenNamefor:
3900                         return "for"; //$NON-NLS-1$
3901                 case TokenNameforeach:
3902                         return "foreach"; //$NON-NLS-1$
3903                 case TokenNamefunction:
3904                         return "function"; //$NON-NLS-1$
3905                 case TokenNameglobal:
3906                         return "global"; //$NON-NLS-1$
3907                 case TokenNameif:
3908                         return "if"; //$NON-NLS-1$
3909                 case TokenNameimplements:
3910                         return "implements"; //$NON-NLS-1$
3911                 case TokenNameinclude:
3912                         return "include"; //$NON-NLS-1$
3913                 case TokenNameinclude_once:
3914                         return "include_once"; //$NON-NLS-1$
3915                 case TokenNameinstanceof:
3916                         return "instanceof"; //$NON-NLS-1$
3917                 case TokenNameinterface:
3918                         return "interface"; //$NON-NLS-1$
3919                 case TokenNameisset:
3920                         return "isset"; //$NON-NLS-1$
3921                 case TokenNamelist:
3922                         return "list"; //$NON-NLS-1$
3923                 case TokenNamenew:
3924                         return "new"; //$NON-NLS-1$
3925                 // case TokenNamenull :
3926                 // return "null"; //$NON-NLS-1$
3927                 case TokenNameor:
3928                         return "OR"; //$NON-NLS-1$
3929                 case TokenNameprint:
3930                         return "print"; //$NON-NLS-1$
3931                 case TokenNameprivate:
3932                         return "private"; //$NON-NLS-1$
3933                 case TokenNameprotected:
3934                         return "protected"; //$NON-NLS-1$
3935                 case TokenNamepublic:
3936                         return "public"; //$NON-NLS-1$
3937                 case TokenNamerequire:
3938                         return "require"; //$NON-NLS-1$
3939                 case TokenNamerequire_once:
3940                         return "require_once"; //$NON-NLS-1$
3941                 case TokenNamereturn:
3942                         return "return"; //$NON-NLS-1$
3943                 case TokenNamestatic:
3944                         return "static"; //$NON-NLS-1$
3945                 case TokenNameswitch:
3946                         return "switch"; //$NON-NLS-1$
3947                 // case TokenNametrue :
3948                 // return "true"; //$NON-NLS-1$
3949                 case TokenNameunset:
3950                         return "unset"; //$NON-NLS-1$
3951                 case TokenNamevar:
3952                         return "var"; //$NON-NLS-1$
3953                 case TokenNamewhile:
3954                         return "while"; //$NON-NLS-1$
3955                 case TokenNamexor:
3956                         return "XOR"; //$NON-NLS-1$
3957                 // case TokenNamethis :
3958                 // return "$this"; //$NON-NLS-1$
3959                 case TokenNameIntegerLiteral:
3960                         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3961                 case TokenNameDoubleLiteral:
3962                         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3963                 case TokenNameStringDoubleQuote:
3964                         return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3965                 case TokenNameStringSingleQuote:
3966                         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3967                 case TokenNameStringInterpolated:
3968                         return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3969                 case TokenNameEncapsedString0:
3970                         return "`"; //$NON-NLS-1$
3971                         // case TokenNameEncapsedString1:
3972                         // return "\'"; //$NON-NLS-1$
3973                         // case TokenNameEncapsedString2:
3974                         // return "\""; //$NON-NLS-1$
3975                 case TokenNameSTRING:
3976                         return "STRING_DQ(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3977                 case TokenNameHEREDOC:
3978                         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3979                 case TokenNamePLUS_PLUS:
3980                         return "++"; //$NON-NLS-1$
3981                 case TokenNameMINUS_MINUS:
3982                         return "--"; //$NON-NLS-1$
3983                 case TokenNameEQUAL_EQUAL:
3984                         return "=="; //$NON-NLS-1$
3985                 case TokenNameEQUAL_EQUAL_EQUAL:
3986                         return "==="; //$NON-NLS-1$
3987                 case TokenNameEQUAL_GREATER:
3988                         return "=>"; //$NON-NLS-1$
3989                 case TokenNameLESS_EQUAL:
3990                         return "<="; //$NON-NLS-1$
3991                 case TokenNameGREATER_EQUAL:
3992                         return ">="; //$NON-NLS-1$
3993                 case TokenNameNOT_EQUAL:
3994                         return "!="; //$NON-NLS-1$
3995                 case TokenNameNOT_EQUAL_EQUAL:
3996                         return "!=="; //$NON-NLS-1$
3997                 case TokenNameLEFT_SHIFT:
3998                         return "<<"; //$NON-NLS-1$
3999                 case TokenNameRIGHT_SHIFT:
4000                         return ">>"; //$NON-NLS-1$
4001                 case TokenNamePLUS_EQUAL:
4002                         return "+="; //$NON-NLS-1$
4003                 case TokenNameMINUS_EQUAL:
4004                         return "-="; //$NON-NLS-1$
4005                 case TokenNameMULTIPLY_EQUAL:
4006                         return "*="; //$NON-NLS-1$
4007                 case TokenNameDIVIDE_EQUAL:
4008                         return "/="; //$NON-NLS-1$
4009                 case TokenNameAND_EQUAL:
4010                         return "&="; //$NON-NLS-1$
4011                 case TokenNameOR_EQUAL:
4012                         return "|="; //$NON-NLS-1$
4013                 case TokenNameXOR_EQUAL:
4014                         return "^="; //$NON-NLS-1$
4015                 case TokenNameREMAINDER_EQUAL:
4016                         return "%="; //$NON-NLS-1$
4017                 case TokenNameDOT_EQUAL:
4018                         return ".="; //$NON-NLS-1$
4019                 case TokenNameLEFT_SHIFT_EQUAL:
4020                         return "<<="; //$NON-NLS-1$
4021                 case TokenNameRIGHT_SHIFT_EQUAL:
4022                         return ">>="; //$NON-NLS-1$
4023                 case TokenNameOR_OR:
4024                         return "||"; //$NON-NLS-1$
4025                 case TokenNameAND_AND:
4026                         return "&&"; //$NON-NLS-1$
4027                 case TokenNamePLUS:
4028                         return "+"; //$NON-NLS-1$
4029                 case TokenNameMINUS:
4030                         return "-"; //$NON-NLS-1$
4031                 case TokenNameMINUS_GREATER:
4032                         return "->";
4033                 case TokenNameNOT:
4034                         return "!"; //$NON-NLS-1$
4035                 case TokenNameREMAINDER:
4036                         return "%"; //$NON-NLS-1$
4037                 case TokenNameXOR:
4038                         return "^"; //$NON-NLS-1$
4039                 case TokenNameAND:
4040                         return "&"; //$NON-NLS-1$
4041                 case TokenNameMULTIPLY:
4042                         return "*"; //$NON-NLS-1$
4043                 case TokenNameOR:
4044                         return "|"; //$NON-NLS-1$
4045                 case TokenNameTWIDDLE:
4046                         return "~"; //$NON-NLS-1$
4047                 case TokenNameTWIDDLE_EQUAL:
4048                         return "~="; //$NON-NLS-1$
4049                 case TokenNameDIVIDE:
4050                         return "/"; //$NON-NLS-1$
4051                 case TokenNameGREATER:
4052                         return ">"; //$NON-NLS-1$
4053                 case TokenNameLESS:
4054                         return "<"; //$NON-NLS-1$
4055                 case TokenNameLPAREN:
4056                         return "("; //$NON-NLS-1$
4057                 case TokenNameRPAREN:
4058                         return ")"; //$NON-NLS-1$
4059                 case TokenNameLBRACE:
4060                         return "{"; //$NON-NLS-1$
4061                 case TokenNameRBRACE:
4062                         return "}"; //$NON-NLS-1$
4063                 case TokenNameLBRACKET:
4064                         return "["; //$NON-NLS-1$
4065                 case TokenNameRBRACKET:
4066                         return "]"; //$NON-NLS-1$
4067                 case TokenNameSEMICOLON:
4068                         return ";"; //$NON-NLS-1$
4069                 case TokenNameQUESTION:
4070                         return "?"; //$NON-NLS-1$
4071                 case TokenNameCOLON:
4072                         return ":"; //$NON-NLS-1$
4073                 case TokenNameCOMMA:
4074                         return ","; //$NON-NLS-1$
4075                 case TokenNameDOT:
4076                         return "."; //$NON-NLS-1$
4077                 case TokenNameEQUAL:
4078                         return "="; //$NON-NLS-1$
4079                 case TokenNameAT:
4080                         return "@";
4081                 case TokenNameDOLLAR:
4082                         return "$";
4083                 case TokenNameDOLLAR_LBRACE:
4084                         return "${";
4085                 case TokenNameLBRACE_DOLLAR:
4086                         return "{$";
4087                 case TokenNameEOF:
4088                         return "EOF"; //$NON-NLS-1$
4089                 case TokenNameWHITESPACE:
4090                         return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4091                 case TokenNameCOMMENT_LINE:
4092                         return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4093                 case TokenNameCOMMENT_BLOCK:
4094                         return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4095                 case TokenNameCOMMENT_PHPDOC:
4096                         return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4097                 // case TokenNameHTML :
4098                 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
4099                 // //$NON-NLS-1$
4100                 case TokenNameFILE:
4101                         return "__FILE__"; //$NON-NLS-1$
4102                 case TokenNameLINE:
4103                         return "__LINE__"; //$NON-NLS-1$
4104                 case TokenNameCLASS_C:
4105                         return "__CLASS__"; //$NON-NLS-1$
4106                 case TokenNameMETHOD_C:
4107                         return "__METHOD__"; //$NON-NLS-1$
4108                 case TokenNameFUNC_C:
4109                         return "__FUNCTION__"; //$NON-NLS-1
4110                 case TokenNameboolCAST:
4111                         return "( bool )"; //$NON-NLS-1$
4112                 case TokenNameintCAST:
4113                         return "( int )"; //$NON-NLS-1$
4114                 case TokenNamedoubleCAST:
4115                         return "( double )"; //$NON-NLS-1$
4116                 case TokenNameobjectCAST:
4117                         return "( object )"; //$NON-NLS-1$
4118                 case TokenNamestringCAST:
4119                         return "( string )"; //$NON-NLS-1$
4120                 default:
4121                         return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
4122                 }
4123         }
4124
4125         public Scanner() {
4126                 this(false, false);
4127         }
4128
4129         public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
4130                 this(tokenizeComments, tokenizeWhiteSpace, false);
4131         }
4132
4133         public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
4134                 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
4135         }
4136
4137         public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4138                         boolean assertMode) {
4139                 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null, true);
4140         }
4141
4142         public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4143                         boolean assertMode, boolean tokenizeStrings, char[][] taskTags, char[][] taskPriorities, boolean isTaskCaseSensitive) {
4144                 this.eofPosition = Integer.MAX_VALUE;
4145                 this.tokenizeComments = tokenizeComments;
4146                 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
4147                 this.tokenizeStrings = tokenizeStrings;
4148                 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
4149 //              this.assertMode = assertMode;
4150                 // this.encapsedStringStack = null;
4151                 this.taskTags = taskTags;
4152                 this.taskPriorities = taskPriorities;
4153         }
4154
4155         private void checkNonExternalizeString() throws InvalidInputException {
4156                 if (currentLine == null)
4157                         return;
4158                 parseTags(currentLine);
4159         }
4160
4161         private void parseTags(NLSLine line) throws InvalidInputException {
4162                 String s = new String(getCurrentTokenSource());
4163                 int pos = s.indexOf(TAG_PREFIX);
4164                 int lineLength = line.size();
4165                 while (pos != -1) {
4166                         int start = pos + TAG_PREFIX_LENGTH;
4167                         int end = s.indexOf(TAG_POSTFIX, start);
4168                         String index = s.substring(start, end);
4169                         int i = 0;
4170                         try {
4171                                 i = Integer.parseInt(index) - 1;
4172                                 // Tags are one based not zero based.
4173                         } catch (NumberFormatException e) {
4174                                 i = -1; // we don't want to consider this as a valid NLS tag
4175                         }
4176                         if (line.exists(i)) {
4177                                 line.set(i, null);
4178                         }
4179                         pos = s.indexOf(TAG_PREFIX, start);
4180                 }
4181                 this.nonNLSStrings = new StringLiteral[lineLength];
4182                 int nonNLSCounter = 0;
4183                 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4184                         StringLiteral literal = (StringLiteral) iterator.next();
4185                         if (literal != null) {
4186                                 this.nonNLSStrings[nonNLSCounter++] = literal;
4187                         }
4188                 }
4189                 if (nonNLSCounter == 0) {
4190                         this.nonNLSStrings = null;
4191                         currentLine = null;
4192                         return;
4193                 }
4194                 this.wasNonExternalizedStringLiteral = true;
4195                 if (nonNLSCounter != lineLength) {
4196                         System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4197                 }
4198                 currentLine = null;
4199         }
4200
4201         public final void scanEscapeCharacter() throws InvalidInputException {
4202                 // the string with "\\u" is a legal string of two chars \ and u
4203                 // thus we use a direct access to the source (for regular cases).
4204                 if (unicodeAsBackSlash) {
4205                         // consume next character
4206                         unicodeAsBackSlash = false;
4207                         // if (((currentCharacter = source[currentPosition++]) == '\\') &&
4208                         // (source[currentPosition] == 'u')) {
4209                         // getNextUnicodeChar();
4210                         // } else {
4211                         if (withoutUnicodePtr != 0) {
4212                                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4213                                 // }
4214                         }
4215                 } else
4216                         currentCharacter = source[currentPosition++];
4217                 switch (currentCharacter) {
4218                 case 'b':
4219                         currentCharacter = '\b';
4220                         break;
4221                 case 't':
4222                         currentCharacter = '\t';
4223                         break;
4224                 case 'n':
4225                         currentCharacter = '\n';
4226                         break;
4227                 case 'f':
4228                         currentCharacter = '\f';
4229                         break;
4230                 case 'r':
4231                         currentCharacter = '\r';
4232                         break;
4233                 case '\"':
4234                         currentCharacter = '\"';
4235                         break;
4236                 case '\'':
4237                         currentCharacter = '\'';
4238                         break;
4239                 case '\\':
4240                         currentCharacter = '\\';
4241                         break;
4242                 default:
4243                         // -----------octal escape--------------
4244                         // OctalDigit
4245                         // OctalDigit OctalDigit
4246                         // ZeroToThree OctalDigit OctalDigit
4247                         int number = Character.getNumericValue(currentCharacter);
4248                         if (number >= 0 && number <= 7) {
4249                                 boolean zeroToThreeNot = number > 3;
4250                                 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4251                                         int digit = Character.getNumericValue(currentCharacter);
4252                                         if (digit >= 0 && digit <= 7) {
4253                                                 number = (number * 8) + digit;
4254                                                 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4255                                                         if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4256                                                                 // Digit --> ignore last character
4257                                                                 currentPosition--;
4258                                                         } else {
4259                                                                 digit = Character.getNumericValue(currentCharacter);
4260                                                                 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4261                                                                         // OctalDigit OctalDigit
4262                                                                         number = (number * 8) + digit;
4263                                                                 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4264                                                                         // --> ignore last character
4265                                                                         currentPosition--;
4266                                                                 }
4267                                                         }
4268                                                 } else { // has read \OctalDigit NonDigit--> ignore last
4269                                                         // character
4270                                                         currentPosition--;
4271                                                 }
4272                                         } else { // has read \OctalDigit NonOctalDigit--> ignore last
4273                                                 // character
4274                                                 currentPosition--;
4275                                         }
4276                                 } else { // has read \OctalDigit --> ignore last character
4277                                         currentPosition--;
4278                                 }
4279                                 if (number > 255)
4280                                         throw new InvalidInputException(INVALID_ESCAPE);
4281                                 currentCharacter = (char) number;
4282                         } else
4283                                 throw new InvalidInputException(INVALID_ESCAPE);
4284                 }
4285         }
4286
4287         // chech presence of task: tags
4288         // TODO (frederic) see if we need to take unicode characters into account...
4289         public void checkTaskTag(int commentStart, int commentEnd) {
4290                 char[] src = this.source;
4291
4292                 // only look for newer task: tags
4293                 if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4294                         return;
4295                 }
4296                 int foundTaskIndex = this.foundTaskCount;
4297                 char previous = src[commentStart + 1]; // should be '*' or '/'
4298                 nextChar: for (int i = commentStart + 2; i < commentEnd && i < this.eofPosition; i++) {
4299                         char[] tag = null;
4300                         char[] priority = null;
4301                         // check for tag occurrence only if not ambiguous with javadoc tag
4302                         if (previous != '@') {
4303                                 nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4304                                         tag = this.taskTags[itag];
4305                                         int tagLength = tag.length;
4306                                         if (tagLength == 0)
4307                                                 continue nextTag;
4308
4309                                         // ensure tag is not leaded with letter if tag starts with a letter
4310                                         if (Scanner.isPHPIdentifierStart(tag[0])) {
4311                                                 if (Scanner.isPHPIdentifierPart(previous)) {
4312                                                         continue nextTag;
4313                                                 }
4314                                         }
4315
4316                                         for (int t = 0; t < tagLength; t++) {
4317                                                 char sc, tc;
4318                                                 int x = i + t;
4319                                                 if (x >= this.eofPosition || x >= commentEnd)
4320                                                         continue nextTag;
4321                                                 if ((sc = src[i + t]) != (tc = tag[t])) { // case sensitive check
4322                                                         if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) { // case
4323                                                                                                                                                                                                                                                                                                                                                                                                                                         // insensitive
4324                                                                                                                                                                                                                                                                                                                                                                                                                                         // check
4325                                                                 continue nextTag;
4326                                                         }
4327                                                 }
4328                                         }
4329                                         // ensure tag is not followed with letter if tag finishes with a
4330                                         // letter
4331                                         if (i + tagLength < commentEnd && Scanner.isPHPIdentifierPart(src[i + tagLength - 1])) {
4332                                                 if (Scanner.isPHPIdentifierPart(src[i + tagLength]))
4333                                                         continue nextTag;
4334                                         }
4335                                         if (this.foundTaskTags == null) {
4336                                                 this.foundTaskTags = new char[5][];
4337                                                 this.foundTaskMessages = new char[5][];
4338                                                 this.foundTaskPriorities = new char[5][];
4339                                                 this.foundTaskPositions = new int[5][];
4340                                         } else if (this.foundTaskCount == this.foundTaskTags.length) {
4341                                                 System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4342                                                                 this.foundTaskCount);
4343                                                 System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4344                                                                 this.foundTaskCount);
4345                                                 System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4346                                                                 this.foundTaskCount);
4347                                                 System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4348                                                                 this.foundTaskCount);
4349                                         }
4350
4351                                         priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4352
4353                                         this.foundTaskTags[this.foundTaskCount] = tag;
4354                                         this.foundTaskPriorities[this.foundTaskCount] = priority;
4355                                         this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4356                                         this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4357                                         this.foundTaskCount++;
4358                                         i += tagLength - 1; // will be incremented when looping
4359                                         break nextTag;
4360                                 }
4361                         }
4362                         previous = src[i];
4363                 }
4364                 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4365                         // retrieve message start and end positions
4366                         int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4367                         int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4368                         // at most beginning of next task
4369                         if (max_value < msgStart) {
4370                                 max_value = msgStart; // would only occur if tag is before EOF.
4371                         }
4372                         int end = -1;
4373                         char c;
4374                         for (int j = msgStart; j < max_value; j++) {
4375                                 if ((c = src[j]) == '\n' || c == '\r') {
4376                                         end = j - 1;
4377                                         break;
4378                                 }
4379                         }
4380                         if (end == -1) {
4381                                 for (int j = max_value; j > msgStart; j--) {
4382                                         if ((c = src[j]) == '*') {
4383                                                 end = j - 1;
4384                                                 break;
4385                                         }
4386                                 }
4387                                 if (end == -1)
4388                                         end = max_value;
4389                         }
4390                         if (msgStart == end)
4391                                 continue; // empty
4392                         // trim the message
4393                         while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
4394                                 end--;
4395                         while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
4396                                 msgStart++;
4397                         // update the end position of the task
4398                         this.foundTaskPositions[i][1] = end;
4399                         // get the message source
4400                         final int messageLength = end - msgStart + 1;
4401                         char[] message = new char[messageLength];
4402                         System.arraycopy(src, msgStart, message, 0, messageLength);
4403                         this.foundTaskMessages[i] = message;
4404                 }
4405         }
4406
4407         // chech presence of task: tags
4408         // public void checkTaskTag(int commentStart, int commentEnd) {
4409         // // only look for newer task: tags
4410         // if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount
4411         // - 1][0] >= commentStart) {
4412         // return;
4413         // }
4414         // int foundTaskIndex = this.foundTaskCount;
4415         // nextChar: for (int i = commentStart; i < commentEnd && i <
4416         // this.eofPosition; i++) {
4417         // char[] tag = null;
4418         // char[] priority = null;
4419         // // check for tag occurrence
4420         // nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4421         // tag = this.taskTags[itag];
4422         // priority = this.taskPriorities != null && itag < this.taskPriorities.length
4423         // ? this.taskPriorities[itag] : null;
4424         // int tagLength = tag.length;
4425         // for (int t = 0; t < tagLength; t++) {
4426         // if (this.source[i + t] != tag[t])
4427         // continue nextTag;
4428         // }
4429         // if (this.foundTaskTags == null) {
4430         // this.foundTaskTags = new char[5][];
4431         // this.foundTaskMessages = new char[5][];
4432         // this.foundTaskPriorities = new char[5][];
4433         // this.foundTaskPositions = new int[5][];
4434         // } else if (this.foundTaskCount == this.foundTaskTags.length) {
4435         // System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new
4436         // char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4437         // System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new
4438         // char[this.foundTaskCount * 2][], 0,
4439         // this.foundTaskCount);
4440         // System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities =
4441         // new char[this.foundTaskCount * 2][], 0,
4442         // this.foundTaskCount);
4443         // System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new
4444         // int[this.foundTaskCount * 2][], 0,
4445         // this.foundTaskCount);
4446         // }
4447         // this.foundTaskTags[this.foundTaskCount] = tag;
4448         // this.foundTaskPriorities[this.foundTaskCount] = priority;
4449         // this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength
4450         // - 1 };
4451         // this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4452         // this.foundTaskCount++;
4453         // i += tagLength - 1; // will be incremented when looping
4454         // }
4455         // }
4456         // for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4457         // // retrieve message start and end positions
4458         // int msgStart = this.foundTaskPositions[i][0] +
4459         // this.foundTaskTags[i].length;
4460         // int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i +
4461         // 1][0] - 1 : commentEnd - 1;
4462         // // at most beginning of next task
4463         // if (max_value < msgStart)
4464         // max_value = msgStart; // would only occur if tag is before EOF.
4465         // int end = -1;
4466         // char c;
4467         // for (int j = msgStart; j < max_value; j++) {
4468         // if ((c = this.source[j]) == '\n' || c == '\r') {
4469         // end = j - 1;
4470         // break;
4471         // }
4472         // }
4473         // if (end == -1) {
4474         // for (int j = max_value; j > msgStart; j--) {
4475         // if ((c = this.source[j]) == '*') {
4476         // end = j - 1;
4477         // break;
4478         // }
4479         // }
4480         // if (end == -1)
4481         // end = max_value;
4482         // }
4483         // if (msgStart == end)
4484         // continue; // empty
4485         // // trim the message
4486         // while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4487         // end--;
4488         // while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4489         // msgStart++;
4490         // // update the end position of the task
4491         // this.foundTaskPositions[i][1] = end;
4492         // // get the message source
4493         // final int messageLength = end - msgStart + 1;
4494         // char[] message = new char[messageLength];
4495         // System.arraycopy(source, msgStart, message, 0, messageLength);
4496         // this.foundTaskMessages[i] = message;
4497         // }
4498         // }
4499 }