1 /*******************************************************************************
2 * Copyright (c) 2000, 2004 IBM Corporation and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v10.html
9 * IBM Corporation - initial API and implementation
10 *******************************************************************************/
11 package net.sourceforge.phpdt.internal.ui.text;
13 import net.sourceforge.phpeclipse.ui.text.rules.AbstractPartitioner;
16 //import org.eclipse.jface.text.Assert;
17 import org.eclipse.core.runtime.Assert;
18 import org.eclipse.jface.text.IDocument;
19 import org.eclipse.jface.text.rules.ICharacterScanner;
20 import org.eclipse.jface.text.rules.IPartitionTokenScanner;
21 import org.eclipse.jface.text.rules.IToken;
22 import org.eclipse.jface.text.rules.Token;
25 * This scanner recognizes the JavaDoc comments, Java multi line comments, Java
26 * single line comments, Java strings.
28 public class FastJavaPartitionScanner implements IPartitionTokenScanner,
33 private static enum PartState {
43 private static enum ScanState {
45 BACKSLASH, // postfix for STRING_DQ and CHARACTER
46 SLASH, // prefix for SINGLE_LINE or MULTI_LINE or JAVADOC
47 SLASH_STAR, // prefix for MULTI_LINE_COMMENT or JAVADOC
48 SLASH_STAR_STAR, // prefix for MULTI_LINE_COMMENT or JAVADOC
49 STAR, // postfix for MULTI_LINE_COMMENT or JAVADOC
50 CARRIAGE_RETURN, // postfix for STRING_DQ, CHARACTER and SINGLE_LINE_COMMENT
52 LESS_LESS, // Found a '<<'
53 LESS_LESS_LESS, // Found a '<<<'
54 HEREDOC_ID, // Found a '<<<' and scanning ID (till and of id, which is a ' ')
55 HEREDOC, // Found a '<<<' and ID
56 HEREDOC_ID_END, // Searching the heredoc end ID
59 /** The heredoc ID string */
60 private String fHeredocId;
62 /** The possible heredoc ID string which is read right after a new line. Ends with a ';' and should
63 * match the heredoc ID string fHeredocId
65 private String fHeredocIdEnd;
68 private final BufferedDocumentScanner fScanner = new BufferedDocumentScanner (1000); // faster implementation
70 /** The offset of the last returned token. */
71 private int fTokenOffset;
73 /** The length of the last returned token. */
74 private int fTokenLength;
76 /** The state of the scanner. */
77 private PartState fState;
79 /** The last significant characters read. */
80 private ScanState fLast;
82 /** The amount of characters already read on first call to nextToken(). */
83 private int fPrefixLength;
85 // emulate JavaPartitionScanner
86 private boolean fEmulate = false;
88 private int fJavaOffset;
90 private int fJavaLength;
92 private final IToken[] fTokens = new IToken[] {
94 new Token (PHP_SINGLELINE_COMMENT),
95 new Token (PHP_MULTILINE_COMMENT),
96 new Token (PHP_PHPDOC_COMMENT),
97 new Token (PHP_STRING_DQ),
98 new Token (PHP_STRING_SQ),
99 new Token (PHP_STRING_HEREDOC)};
101 public FastJavaPartitionScanner(boolean emulate) {
105 public FastJavaPartitionScanner() {
110 * Emulate JavaPartitionScanner
112 * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
114 public IToken nextToken() {
116 if ((fJavaOffset != -1) && (fTokenOffset + fTokenLength != fJavaOffset + fJavaLength)) {
117 fTokenOffset += fTokenLength;
119 return fTokens[PartState.PHP.ordinal()];
127 fTokenOffset += fTokenLength; // The new token offset is the offset of the previous partition + length of previous partition
128 fTokenLength = fPrefixLength; // The new partition is at least the length of the start of the new partition
131 final int ch = fScanner.read();
134 case ICharacterScanner.EOF:
135 if (fTokenLength > 0) {
136 fLast = ScanState.NONE; // ignore last
137 return preFix (fState, PartState.PHP, ScanState.NONE, 0);
140 fLast = ScanState.NONE;
145 case '\r': // Found a carriage return
146 // emulate JavaPartitionScanner
147 if (!fEmulate && (fLast != ScanState.CARRIAGE_RETURN)) {
148 fLast = ScanState.CARRIAGE_RETURN; // Set to what we currently found
149 fTokenLength++; // and count the partition length
151 continue; // Go for the next character to read
155 case SINGLE_LINE_COMMENT:
156 if (fTokenLength > 0) {
157 IToken token = fTokens[fState.ordinal()];
159 // emulate JavaPartitionScanner
162 fLast = ScanState.NONE;
166 fLast = ScanState.CARRIAGE_RETURN;
170 fState = PartState.PHP;
184 case '\n': // Found a line feed
186 case SINGLE_LINE_COMMENT: // If we running within a single line comment,
187 return postFix (fState); // this is the end my friend
189 case STRING_HEREDOC: // If we running within a heredoc string
190 fTokenLength++; // Count the character
191 fLast = ScanState.CARRIAGE_RETURN; // and state is still new line
194 default: // If running anywhere else than on a single line comment
195 consume(); // count the length of the current partition
200 if (fState == PartState.SINGLE_LINE_COMMENT) {
201 int nextch = fScanner.read();
204 // <h1>This is an <?php # echo 'simple' ?> example.</h1>
209 return postFix (fState);
212 // bug #1404228: Crash on <?php // comment ?>
213 if (nextch != ICharacterScanner.EOF) {
220 if (!fEmulate && (fLast == ScanState.CARRIAGE_RETURN)) {
222 case SINGLE_LINE_COMMENT:
231 last = ScanState.SLASH;
232 newState = PartState.PHP;
236 last = ScanState.STAR;
237 newState = PartState.PHP;
241 last = ScanState.NONE;
242 newState = PartState.STRING_SQ;
246 last = ScanState.NONE;
247 newState = PartState.STRING_DQ;
251 last = ScanState.CARRIAGE_RETURN;
252 newState = PartState.PHP;
256 last = ScanState.BACKSLASH;
257 newState = PartState.PHP;
261 last = ScanState.NONE;
262 newState = PartState.PHP;
266 fLast = ScanState.NONE; // ignore fLast
267 return preFix (fState, newState, last, 1);
279 case '#': // Start of a single line comment
280 if (fTokenLength > 0) {
281 return preFix (PartState.PHP, PartState.SINGLE_LINE_COMMENT, ScanState.NONE, 1);
284 preFix (PartState.PHP, PartState.SINGLE_LINE_COMMENT, ScanState.NONE, 1);
285 fTokenOffset += fTokenLength;
286 fTokenLength = fPrefixLength;
291 if (fLast == ScanState.LESS) {
293 fLast = ScanState.LESS_LESS;
295 else if (fLast == ScanState.LESS_LESS) {
296 if (fTokenLength - getLastLength(fLast) > 0) { // this is the start of a single line comment
297 return preFix (PartState.PHP, PartState.STRING_HEREDOC, ScanState.LESS_LESS_LESS, 3);
300 preFix (PartState.PHP, PartState.STRING_HEREDOC, ScanState.LESS_LESS_LESS, 3);
301 fTokenOffset += fTokenLength;
302 fTokenLength = fPrefixLength;
307 fLast = ScanState.LESS;
311 case '/': // Start of single line comment?
312 if (fLast == ScanState.SLASH) { // If previous character was already a slash,
313 if (fTokenLength - getLastLength(fLast) > 0) { // this is the start of a single line comment
314 return preFix (PartState.PHP, PartState.SINGLE_LINE_COMMENT, ScanState.NONE, 2);
317 preFix (PartState.PHP, PartState.SINGLE_LINE_COMMENT, ScanState.NONE, 2);
318 fTokenOffset += fTokenLength;
319 fTokenLength = fPrefixLength;
324 fLast = ScanState.SLASH; // We currently found a slash
329 if (fLast == ScanState.SLASH) { // If previous character was a slash
330 if (fTokenLength - getLastLength (fLast) > 0) { // this is the start of a comment /*
331 return preFix (PartState.PHP, PartState.MULTI_LINE_COMMENT, ScanState.SLASH_STAR, 2);
334 preFix (PartState.PHP, PartState.MULTI_LINE_COMMENT, ScanState.SLASH_STAR, 2);
335 fTokenOffset += fTokenLength;
336 fTokenLength = fPrefixLength;
339 else { // No slash before the '*', so it's a normal character
344 case '\'': // The start of a single quoted string
345 fLast = ScanState.NONE; // ignore fLast
347 if (fTokenLength > 0) {
348 return preFix (PartState.PHP, PartState.STRING_SQ, ScanState.NONE, 1);
351 preFix (PartState.PHP, PartState.STRING_SQ, ScanState.NONE, 1);
352 fTokenOffset += fTokenLength;
353 fTokenLength = fPrefixLength;
357 case '"': // The start of a double quoted string
358 fLast = ScanState.NONE; // ignore fLast
360 if (fTokenLength > 0) {
361 return preFix (PartState.PHP, PartState.STRING_DQ, ScanState.NONE, 1);
364 preFix (PartState.PHP, PartState.STRING_DQ, ScanState.NONE, 1);
365 fTokenOffset += fTokenLength;
366 fTokenLength = fPrefixLength;
370 default: // Just a normal character with no special meaning
376 case SINGLE_LINE_COMMENT: // We are just running within a single line comment (started with // or #)
380 case PHPDOC: // We are just running within a php doc comment
384 case SLASH_STAR_STAR:
385 return postFix (PartState.MULTI_LINE_COMMENT);
388 return postFix (PartState.PHPDOC); // Found the end of the php doc (multi line) comment
396 case '*': // Found a '*'
398 fLast = ScanState.STAR; // Remember that we found a '*'
407 case MULTI_LINE_COMMENT: // We are currently running through a (possible) multi line comment
409 case '*': // and we found a '*'
410 if (fLast == ScanState.SLASH_STAR) { // If the previous characters have been a /*
411 fLast = ScanState.SLASH_STAR_STAR;
413 fState = PartState.PHPDOC;
417 fLast = ScanState.STAR;
422 if (fLast == ScanState.STAR) {
423 return postFix (PartState.MULTI_LINE_COMMENT);
439 fLast = (fLast == ScanState.BACKSLASH) ? ScanState.NONE : ScanState.BACKSLASH;
444 if (fLast != ScanState.BACKSLASH) {
445 return postFix (PartState.STRING_DQ);
461 fLast = (fLast == ScanState.BACKSLASH) ? ScanState.NONE : ScanState.BACKSLASH;
466 if (fLast != ScanState.BACKSLASH) {
467 return postFix (PartState.STRING_SQ);
480 case STRING_HEREDOC: // We are just running within a heredoc string
482 case LESS_LESS_LESS: // The first time after we recognized the '<<<'
483 fLast = ScanState.HEREDOC_ID; // We do a scan of the heredoc id string
485 fHeredocId += (char) ch;
489 case HEREDOC_ID: // Scan the starting heredoc ID
491 fLast = ScanState.HEREDOC;
495 fHeredocId += (char) ch;
500 case CARRIAGE_RETURN: // We previously found a new line
503 fHeredocIdEnd += (char) ch; // Add the first character to the (possible) end ID
504 fLast = ScanState.HEREDOC_ID_END; // Go for scanning the (possible) end ID
507 case HEREDOC_ID_END: // We scan the (possible) end ID
508 if (ch == ';') { // End ID ends with an ';'
509 if (fHeredocId.compareTo (fHeredocIdEnd) == 0) { // If start ID and end ID matches.
510 return postFix (PartState.STRING_HEREDOC); // It's the end of a heredoc partition
513 consume (); // Wrong end ID, so just eat the character
518 fHeredocIdEnd += (char) ch; // Add the characther to the possible heredoc end ID
522 default: // Normally state NONE
523 consume (); // Eat the character
527 } // end of switch (fState)
531 private static final int getLastLength (ScanState last) {
540 case CARRIAGE_RETURN:
550 case SLASH_STAR_STAR:
558 private final void consume() {
559 fTokenLength++; // Count the character
560 fLast = ScanState.NONE; // Reset scanner state to nothing special
564 * If we found the end of a partition, return the type of the partition which is currently finished
566 * @param state The type of partition we found the end for
569 private final IToken postFix (PartState state) {
571 fLast = ScanState.NONE; // Reset the scanner state
572 fState = PartState.PHP; // The type of the next partition is just PHP
573 fPrefixLength = 0; // and have no prefix length
575 return fTokens[state.ordinal()]; // Return the type of partition for which we found the end
579 * If we find the prefix of a new partition, return the type of the previous partition
584 * @param prefixLength
587 private final IToken preFix (PartState oldState, PartState newState, ScanState last, int prefixLength) {
588 if (fEmulate && // If we are in emulation run
589 (oldState == PartState.PHP) &&
590 (fTokenLength - getLastLength (fLast) > 0)) {
592 fTokenLength -= getLastLength (fLast);
593 fJavaOffset = fTokenOffset;
594 fJavaLength = fTokenLength;
597 fPrefixLength = prefixLength;
600 return fTokens[oldState.ordinal()];
603 fTokenLength -= getLastLength (fLast); // Set the length of the last token (partition)
604 fLast = last; // Remember the type of the type of the last partition
605 fPrefixLength = prefixLength; // Remember the length of the currently found start of new partition
606 fState = newState; // The type of the new partition we found
608 IToken token = fTokens[oldState.ordinal()]; // Return the type of the old partition
614 private static PartState getState (String contentType) {
615 if (contentType == null)
616 return PartState.PHP;
618 else if (contentType.equals (PHP_SINGLELINE_COMMENT))
619 return PartState.SINGLE_LINE_COMMENT;
621 else if (contentType.equals (PHP_MULTILINE_COMMENT))
622 return PartState.MULTI_LINE_COMMENT;
624 else if (contentType.equals (PHP_PHPDOC_COMMENT))
625 return PartState.PHPDOC;
627 else if (contentType.equals (PHP_STRING_DQ))
628 return PartState.STRING_DQ;
630 else if (contentType.equals (PHP_STRING_SQ))
631 return PartState.STRING_SQ;
633 else if (contentType.equals (PHP_STRING_HEREDOC))
634 return PartState.STRING_HEREDOC;
637 return PartState.PHP;
641 * @see IPartitionTokenScanner#setPartialRange (IDocument, int, int, String, int)
643 * @note Because of the PHP heredoc syntax we need to parse from the beginning of a heredoc partition,
644 * and not from anywhere in the middle. When not reading the start of the heredoc (and the correct heredoc start ID,
645 * we can't recognize the correct heredoc end ID. So we start if possible form the partitionOffset.
648 public void setPartialRange (IDocument document, int offset, int length, String contentType, int partitionOffset) {
649 if (partitionOffset >= 0) {
650 fScanner.setRange (document, partitionOffset, length + (offset - partitionOffset));
652 fTokenOffset = partitionOffset;
655 fLast = ScanState.NONE;
656 fState = PartState.PHP; // restart at beginning of partition
659 fScanner.setRange (document, offset, length);
661 fTokenOffset = partitionOffset;
663 fPrefixLength = offset - partitionOffset;
664 fLast = ScanState.NONE;
666 if (offset == partitionOffset) {
667 fState = PartState.PHP; // restart at beginning of partition
670 fState = getState(contentType);
673 // emulate JavaPartitionScanner
681 * @see ITokenScanner#setRange(IDocument, int, int)
683 public void setRange (IDocument document, int offset, int length) {
684 fScanner.setRange (document, offset, length);
686 fTokenOffset = offset;
689 fLast = ScanState.NONE;
690 fState = PartState.PHP;
692 // emulate JavaPartitionScanner
700 * @see ITokenScanner#getTokenLength()
702 public int getTokenLength() {
707 * @see ITokenScanner#getTokenOffset()
709 public int getTokenOffset() {
710 if (AbstractPartitioner.DEBUG) {
711 Assert.isTrue(fTokenOffset >= 0, Integer.toString(fTokenOffset));