1 /*******************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v0.5
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v05.html
9 * IBM Corporation - initial API and implementation
10 ******************************************************************************/
11 package net.sourceforge.phpdt.internal.codeassist.complete;
14 * Scanner aware of a cursor location so as to discard trailing portions of identifiers
15 * containing the cursor location.
17 * Cursor location denotes the position of the last character behind which completion
19 * -1 means completion at the very beginning of the source
20 * 0 means completion behind the first character
21 * n means completion behind the n-th character
23 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
24 import net.sourceforge.phpdt.internal.compiler.parser.Scanner;
26 public class CompletionScanner extends Scanner {
28 public char[] completionIdentifier;
29 public int cursorLocation;
31 /* Source positions of the completedIdentifier
32 * if inside actual identifier, end goes to the actual identifier
33 * end, i.e. beyond cursor location
35 public int completedIdentifierStart = 0;
36 public int completedIdentifierEnd = -1;
38 public static final char[] EmptyCompletionIdentifier = {};
39 public CompletionScanner(boolean assertMode) {
40 super(false, false, false, assertMode);
43 * Truncate the current identifier if it is containing the cursor location. Since completion is performed
44 * on an identifier prefix.
47 public char[] getCurrentIdentifierSource() {
49 if (completionIdentifier == null){
50 if (cursorLocation < startPosition && currentPosition == startPosition){ // fake empty identifier got issued
51 // remember actual identifier positions
52 completedIdentifierStart = startPosition;
53 completedIdentifierEnd = completedIdentifierStart - 1;
54 return completionIdentifier = EmptyCompletionIdentifier;
56 if (cursorLocation+1 >= startPosition && cursorLocation < currentPosition){
57 // remember actual identifier positions
58 completedIdentifierStart = startPosition;
59 completedIdentifierEnd = currentPosition - 1;
60 if (withoutUnicodePtr != 0){ // check unicode scenario
61 System.arraycopy(withoutUnicodeBuffer, 1, completionIdentifier = new char[withoutUnicodePtr], 0, withoutUnicodePtr);
63 int length = cursorLocation + 1 - startPosition;
64 // no char[] sharing around completionIdentifier, we want it to be unique so as to use identity checks
65 System.arraycopy(source, startPosition, (completionIdentifier = new char[length]), 0, length);
67 return completionIdentifier;
70 return super.getCurrentIdentifierSource();
73 * Identifier splitting for unicodes.
74 * Only store the current unicode if we did not pass the cursorLocation.
75 * Note: this does not handle cases where the cursor is in the middle of a unicode
77 public boolean getNextCharAsJavaIdentifierPart() {
79 int temp = currentPosition;
81 if (((currentCharacter = source[currentPosition++]) == '\\')
82 && (source[currentPosition] == 'u')) {
83 //-------------unicode traitement ------------
87 while (source[currentPosition] == 'u') {
92 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
94 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0)
95 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0)
96 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) {
97 currentPosition = temp;
101 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
102 if (!Character.isJavaIdentifierPart(currentCharacter)) {
103 currentPosition = temp;
107 //need the unicode buffer
108 if (withoutUnicodePtr == 0) {
109 //buffer all the entries that have been left aside....
110 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
114 withoutUnicodeBuffer,
118 if (temp < cursorLocation && cursorLocation < currentPosition-1){
119 throw new InvalidCursorLocation(InvalidCursorLocation.NO_COMPLETION_INSIDE_UNICODE);
121 // store the current unicode, only if we did not pass the cursorLocation
122 // Note: this does not handle cases where the cursor is in the middle of a unicode
123 if ((completionIdentifier != null)
124 || (startPosition <= cursorLocation+1 && cursorLocation >= currentPosition-1)){
125 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
128 } //-------------end unicode traitement--------------
130 if (!Character.isJavaIdentifierPart(currentCharacter)) {
131 currentPosition = temp;
135 if (withoutUnicodePtr != 0){
136 // store the current unicode, only if we did not pass the cursorLocation
137 // Note: this does not handle cases where the cursor is in the middle of a unicode
138 if ((completionIdentifier != null)
139 || (startPosition <= cursorLocation+1 && cursorLocation >= currentPosition-1)){
140 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
145 } catch (IndexOutOfBoundsException e) {
146 currentPosition = temp;
150 public int getNextToken() throws InvalidInputException {
154 jumpOverMethodBody();
156 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
159 while (true) { //loop for jumping over comments
160 withoutUnicodePtr = 0;
161 //start with a new token (even comment written with unicode )
163 // ---------Consume white space and handles startPosition---------
164 int whiteStart = currentPosition;
165 boolean isWhiteSpace;
167 startPosition = currentPosition;
168 if (((currentCharacter = source[currentPosition++]) == '\\')
169 && (source[currentPosition] == 'u')) {
170 isWhiteSpace = jumpOverUnicodeWhiteSpace();
172 if (recordLineSeparator
173 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
176 (currentCharacter == ' ') || Character.isWhitespace(currentCharacter);
178 /* completion requesting strictly inside blanks */
179 if ((whiteStart != currentPosition)
180 //&& (previousToken == TokenNameDOT)
181 && (completionIdentifier == null)
182 && (whiteStart <= cursorLocation+1)
183 && (cursorLocation < startPosition)
184 && !Character.isJavaIdentifierStart(currentCharacter)){
185 currentPosition = startPosition; // for next token read
186 return TokenNameIdentifier;
188 } while (isWhiteSpace);
189 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
190 // reposition scanner in case we are interested by spaces as tokens
192 startPosition = whiteStart;
193 return TokenNameWHITESPACE;
195 //little trick to get out in the middle of a source computation
196 if (currentPosition > eofPosition){
197 /* might be completing at eof (e.g. behind a dot) */
198 if (completionIdentifier == null &&
199 startPosition == cursorLocation + 1){
200 currentPosition = startPosition; // for being detected as empty free identifier
201 return TokenNameIdentifier;
206 // ---------Identify the next token-------------
208 switch (currentCharacter) {
210 return TokenNameLPAREN;
212 return TokenNameRPAREN;
214 return TokenNameLBRACE;
216 return TokenNameRBRACE;
218 return TokenNameLBRACKET;
220 return TokenNameRBRACKET;
222 return TokenNameSEMICOLON;
224 return TokenNameCOMMA;
226 if (startPosition <= cursorLocation
227 && cursorLocation < currentPosition){
228 return TokenNameDOT; // completion inside .<|>12
230 if (getNextCharAsDigit())
231 return scanNumber(true);
236 if ((test = getNextChar('+', '=')) == 0)
237 return TokenNamePLUS_PLUS;
239 return TokenNamePLUS_EQUAL;
240 return TokenNamePLUS;
245 if ((test = getNextChar('-', '=')) == 0)
246 return TokenNameMINUS_MINUS;
248 return TokenNameMINUS_EQUAL;
249 return TokenNameMINUS;
252 return TokenNameTWIDDLE;
254 if (getNextChar('='))
255 return TokenNameNOT_EQUAL;
258 if (getNextChar('='))
259 return TokenNameMULTIPLY_EQUAL;
260 return TokenNameMULTIPLY;
262 if (getNextChar('='))
263 return TokenNameREMAINDER_EQUAL;
264 return TokenNameREMAINDER;
268 if ((test = getNextChar('=', '<')) == 0)
269 return TokenNameLESS_EQUAL;
271 if (getNextChar('='))
272 return TokenNameLEFT_SHIFT_EQUAL;
273 return TokenNameLEFT_SHIFT;
275 return TokenNameLESS;
280 if ((test = getNextChar('=', '>')) == 0)
281 return TokenNameGREATER_EQUAL;
283 if ((test = getNextChar('=', '>')) == 0)
284 return TokenNameRIGHT_SHIFT_EQUAL;
286 if (getNextChar('='))
287 return TokenNameUNSIGNED_RIGHT_SHIFT_EQUAL;
288 return TokenNameUNSIGNED_RIGHT_SHIFT;
290 return TokenNameRIGHT_SHIFT;
292 return TokenNameGREATER;
295 if (getNextChar('='))
296 return TokenNameEQUAL_EQUAL;
297 return TokenNameEQUAL;
301 if ((test = getNextChar('&', '=')) == 0)
302 return TokenNameAND_AND;
304 return TokenNameAND_EQUAL;
310 if ((test = getNextChar('|', '=')) == 0)
311 return TokenNameOR_OR;
313 return TokenNameOR_EQUAL;
317 if (getNextChar('='))
318 return TokenNameXOR_EQUAL;
321 return TokenNameQUESTION;
323 return TokenNameCOLON;
327 if ((test = getNextChar('\n', '\r')) == 0) {
328 throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
331 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
332 for (int lookAhead = 0; lookAhead < 3; lookAhead++) {
333 if (currentPosition + lookAhead == source.length)
335 if (source[currentPosition + lookAhead] == '\n')
337 if (source[currentPosition + lookAhead] == '\'') {
338 currentPosition += lookAhead + 1;
342 throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
345 if (getNextChar('\'')) {
346 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
347 for (int lookAhead = 0; lookAhead < 3; lookAhead++) {
348 if (currentPosition + lookAhead == source.length)
350 if (source[currentPosition + lookAhead] == '\n')
352 if (source[currentPosition + lookAhead] == '\'') {
353 currentPosition += lookAhead + 1;
357 throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
359 if (getNextChar('\\'))
360 scanEscapeCharacter();
361 else { // consume next character
362 unicodeAsBackSlash = false;
363 if (((currentCharacter = source[currentPosition++]) == '\\')
364 && (source[currentPosition] == 'u')) {
365 getNextUnicodeChar();
367 if (withoutUnicodePtr != 0) {
368 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
372 if (getNextChar('\''))
373 return TokenNameCharacterLiteral;
374 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
375 for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
376 if (currentPosition + lookAhead == source.length)
378 if (source[currentPosition + lookAhead] == '\n')
380 if (source[currentPosition + lookAhead] == '\'') {
381 currentPosition += lookAhead + 1;
385 throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
388 // consume next character
389 unicodeAsBackSlash = false;
390 if (((currentCharacter = source[currentPosition++]) == '\\')
391 && (source[currentPosition] == 'u')) {
392 getNextUnicodeChar();
394 if (withoutUnicodePtr != 0) {
395 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
399 while (currentCharacter != '"') {
400 /**** \r and \n are not valid in string literals ****/
401 if ((currentCharacter == '\n') || (currentCharacter == '\r')) {
402 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
403 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
404 if (currentPosition + lookAhead == source.length)
406 if (source[currentPosition + lookAhead] == '\n')
408 if (source[currentPosition + lookAhead] == '\"') {
409 currentPosition += lookAhead + 1;
413 throw new InvalidInputException(INVALID_CHAR_IN_STRING);
415 if (currentCharacter == '\\') {
416 int escapeSize = currentPosition;
417 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
418 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
419 scanEscapeCharacter();
420 escapeSize = currentPosition - escapeSize;
421 if (withoutUnicodePtr == 0) {
422 //buffer all the entries that have been left aside....
423 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
427 withoutUnicodeBuffer,
430 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
431 } else { //overwrite the / in the buffer
432 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
433 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
438 // consume next character
439 unicodeAsBackSlash = false;
440 if (((currentCharacter = source[currentPosition++]) == '\\')
441 && (source[currentPosition] == 'u')) {
442 getNextUnicodeChar();
444 if (withoutUnicodePtr != 0) {
445 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
450 } catch (IndexOutOfBoundsException e) {
451 throw new InvalidInputException(UNTERMINATED_STRING);
452 } catch (InvalidInputException e) {
453 if (e.getMessage().equals(INVALID_ESCAPE)) {
454 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
455 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
456 if (currentPosition + lookAhead == source.length)
458 if (source[currentPosition + lookAhead] == '\n')
460 if (source[currentPosition + lookAhead] == '\"') {
461 currentPosition += lookAhead + 1;
469 if (startPosition <= cursorLocation && cursorLocation <= currentPosition-1){
470 throw new InvalidCursorLocation(InvalidCursorLocation.NO_COMPLETION_INSIDE_STRING);
472 return TokenNameStringLiteral;
476 if ((test = getNextChar('/', '*')) == 0) { //line comment
477 try { //get the next char
478 if (((currentCharacter = source[currentPosition++]) == '\\')
479 && (source[currentPosition] == 'u')) {
480 //-------------unicode traitement ------------
481 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
483 while (source[currentPosition] == 'u') {
486 if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
488 || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
490 || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
492 || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
494 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
496 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
500 //handle the \\u case manually into comment
501 if (currentCharacter == '\\') {
502 if (source[currentPosition] == '\\')
505 while (currentCharacter != '\r' && currentCharacter != '\n') {
507 if (((currentCharacter = source[currentPosition++]) == '\\')
508 && (source[currentPosition] == 'u')) {
509 //-------------unicode traitement ------------
510 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
512 while (source[currentPosition] == 'u') {
515 if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
517 || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
519 || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
521 || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
523 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
525 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
528 //handle the \\u case manually into comment
529 if (currentCharacter == '\\') {
530 if (source[currentPosition] == '\\')
534 recordComment(false);
535 if (startPosition <= cursorLocation && cursorLocation < currentPosition-1){
536 throw new InvalidCursorLocation(InvalidCursorLocation.NO_COMPLETION_INSIDE_COMMENT);
538 if (recordLineSeparator
539 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
541 if (tokenizeComments) {
542 currentPosition--; // reset one character behind
543 return TokenNameCOMMENT_LINE;
545 } catch (IndexOutOfBoundsException e) { //an eof will them be generated
546 if (tokenizeComments) {
547 currentPosition--; // reset one character behind
548 return TokenNameCOMMENT_LINE;
553 if (test > 0) { //traditional and annotation comment
554 boolean isJavadoc = false, star = false;
555 // consume next character
556 unicodeAsBackSlash = false;
557 if (((currentCharacter = source[currentPosition++]) == '\\')
558 && (source[currentPosition] == 'u')) {
559 getNextUnicodeChar();
561 if (withoutUnicodePtr != 0) {
562 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
566 if (currentCharacter == '*') {
570 if (recordLineSeparator
571 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
573 try { //get the next char
574 if (((currentCharacter = source[currentPosition++]) == '\\')
575 && (source[currentPosition] == 'u')) {
576 //-------------unicode traitement ------------
577 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
579 while (source[currentPosition] == 'u') {
582 if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
584 || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
586 || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
588 || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
590 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
592 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
595 //handle the \\u case manually into comment
596 if (currentCharacter == '\\') {
597 if (source[currentPosition] == '\\')
600 // empty comment is not a javadoc /**/
601 if (currentCharacter == '/') {
604 //loop until end of comment */
605 while ((currentCharacter != '/') || (!star)) {
606 if (recordLineSeparator
607 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
609 star = currentCharacter == '*';
611 if (((currentCharacter = source[currentPosition++]) == '\\')
612 && (source[currentPosition] == 'u')) {
613 //-------------unicode traitement ------------
614 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
616 while (source[currentPosition] == 'u') {
619 if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
621 || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
623 || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
625 || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
627 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
629 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
632 //handle the \\u case manually into comment
633 if (currentCharacter == '\\') {
634 if (source[currentPosition] == '\\')
638 recordComment(isJavadoc);
639 if (startPosition <= cursorLocation && cursorLocation < currentPosition-1){
640 throw new InvalidCursorLocation(InvalidCursorLocation.NO_COMPLETION_INSIDE_COMMENT);
642 if (tokenizeComments) {
644 return TokenNameCOMMENT_JAVADOC;
645 return TokenNameCOMMENT_BLOCK;
647 } catch (IndexOutOfBoundsException e) {
648 throw new InvalidInputException(UNTERMINATED_COMMENT);
652 if (getNextChar('='))
653 return TokenNameDIVIDE_EQUAL;
654 return TokenNameDIVIDE;
659 //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
660 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
663 if (Character.isJavaIdentifierStart(currentCharacter))
664 return scanIdentifierOrKeyword();
665 if (Character.isDigit(currentCharacter))
666 return scanNumber(false);
667 return TokenNameERROR;
670 } //-----------------end switch while try--------------------
671 catch (IndexOutOfBoundsException e) {
673 /* might be completing at very end of file (e.g. behind a dot) */
674 if (completionIdentifier == null &&
675 startPosition == cursorLocation + 1){
676 currentPosition = startPosition; // for being detected as empty free identifier
677 return TokenNameIdentifier;
682 * In case we actually read a keyword, but the cursor is located inside,
683 * we pretend we read an identifier.
685 public int scanIdentifierOrKeyword() throws InvalidInputException {
687 int id = super.scanIdentifierOrKeyword();
689 // convert completed keyword into an identifier
690 if (id != TokenNameIdentifier
691 && startPosition <= cursorLocation+1
692 && cursorLocation < currentPosition){
693 return TokenNameIdentifier;
697 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
699 int token = super.scanNumber(dotPrefix);
701 // consider completion just before a number to be ok, will insert before it
702 if (startPosition <= cursorLocation && cursorLocation < currentPosition){
703 throw new InvalidCursorLocation(InvalidCursorLocation.NO_COMPLETION_INSIDE_NUMBER);