/******************************************************************************* * Copyright (c) 2000, 2004 IBM Corporation and others. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Common Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/cpl-v10.html * * Contributors: * IBM Corporation - initial API and implementation *******************************************************************************/ package net.sourceforge.phpdt.internal.ui.text; import net.sourceforge.phpeclipse.ui.text.rules.AbstractPartitioner; //incastrix //import org.eclipse.jface.text.Assert; import org.eclipse.core.runtime.Assert; import org.eclipse.jface.text.IDocument; import org.eclipse.jface.text.rules.ICharacterScanner; import org.eclipse.jface.text.rules.IPartitionTokenScanner; import org.eclipse.jface.text.rules.IToken; import org.eclipse.jface.text.rules.Token; /** * This scanner recognizes the JavaDoc comments, Java multi line comments, Java * single line comments, Java strings. */ public class FastJavaPartitionScanner implements IPartitionTokenScanner, IPHPPartitions { // states private static enum PartState { PHP, SINGLE_LINE_COMMENT, MULTI_LINE_COMMENT, PHPDOC, STRING_DQ, STRING_SQ, STRING_HEREDOC, }; private static enum ScanState { NONE, BACKSLASH, // postfix for STRING_DQ and CHARACTER SLASH, // prefix for SINGLE_LINE or MULTI_LINE or JAVADOC SLASH_STAR, // prefix for MULTI_LINE_COMMENT or JAVADOC SLASH_STAR_STAR, // prefix for MULTI_LINE_COMMENT or JAVADOC STAR, // postfix for MULTI_LINE_COMMENT or JAVADOC CARRIAGE_RETURN, // postfix for STRING_DQ, CHARACTER and SINGLE_LINE_COMMENT LESS, // Found a '<' LESS_LESS, // Found a '<<' LESS_LESS_LESS, // Found a '<<<' HEREDOC_ID, // Found a '<<<' and scanning ID (till and of id, which is a ' ') HEREDOC, // Found a '<<<' and ID HEREDOC_ID_END, // Searching the heredoc end ID }; /** The heredoc ID string */ private String fHeredocId; /** The possible heredoc ID string which is read right after a new line. Ends with a ';' and should * match the heredoc ID string fHeredocId */ private String fHeredocIdEnd; /** The scanner. */ private final BufferedDocumentScanner fScanner = new BufferedDocumentScanner (1000); // faster implementation /** The offset of the last returned token. */ private int fTokenOffset; /** The length of the last returned token. */ private int fTokenLength; /** The state of the scanner. */ private PartState fState; /** The last significant characters read. */ private ScanState fLast; /** The amount of characters already read on first call to nextToken(). */ private int fPrefixLength; // emulate JavaPartitionScanner private boolean fEmulate = false; private int fJavaOffset; private int fJavaLength; private final IToken[] fTokens = new IToken[] { new Token (null), new Token (PHP_SINGLELINE_COMMENT), new Token (PHP_MULTILINE_COMMENT), new Token (PHP_PHPDOC_COMMENT), new Token (PHP_STRING_DQ), new Token (PHP_STRING_SQ), new Token (PHP_STRING_HEREDOC)}; public FastJavaPartitionScanner(boolean emulate) { fEmulate = emulate; } public FastJavaPartitionScanner() { this(false); } /** * Emulate JavaPartitionScanner * * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken() */ public IToken nextToken() { if (fEmulate) { if ((fJavaOffset != -1) && (fTokenOffset + fTokenLength != fJavaOffset + fJavaLength)) { fTokenOffset += fTokenLength; return fTokens[PartState.PHP.ordinal()]; } else { fJavaOffset = -1; fJavaLength = 0; } } fTokenOffset += fTokenLength; // The new token offset is the offset of the previous partition + length of previous partition fTokenLength = fPrefixLength; // The new partition is at least the length of the start of the new partition while (true) { final int ch = fScanner.read(); switch (ch) { case ICharacterScanner.EOF: if (fTokenLength > 0) { fLast = ScanState.NONE; // ignore last return preFix (fState, PartState.PHP, ScanState.NONE, 0); } else { fLast = ScanState.NONE; fPrefixLength = 0; return Token.EOF; } case '\r': // Found a carriage return // emulate JavaPartitionScanner if (!fEmulate && (fLast != ScanState.CARRIAGE_RETURN)) { fLast = ScanState.CARRIAGE_RETURN; // Set to what we currently found fTokenLength++; // and count the partition length continue; // Go for the next character to read } else { switch (fState) { case SINGLE_LINE_COMMENT: if (fTokenLength > 0) { IToken token = fTokens[fState.ordinal()]; // emulate JavaPartitionScanner if (fEmulate) { fTokenLength++; fLast = ScanState.NONE; fPrefixLength = 0; } else { fLast = ScanState.CARRIAGE_RETURN; fPrefixLength = 1; } fState = PartState.PHP; return token; } else { consume(); continue; } default: consume(); continue; } } case '\n': // Found a line feed switch (fState) { case SINGLE_LINE_COMMENT: // If we running within a single line comment, return postFix (fState); // this is the end my friend case STRING_HEREDOC: // If we running within a heredoc string fTokenLength++; // Count the character fLast = ScanState.CARRIAGE_RETURN; // and state is still new line continue; default: // If running anywhere else than on a single line comment consume(); // count the length of the current partition continue; } case '?': if (fState == PartState.SINGLE_LINE_COMMENT) { int nextch = fScanner.read(); if (nextch == '>') { //

This is an example.

fTokenLength--; fScanner.unread(); fScanner.unread(); return postFix (fState); } else { // bug #1404228: Crash on if (nextch != ICharacterScanner.EOF) { fScanner.unread(); } } } default: if (!fEmulate && (fLast == ScanState.CARRIAGE_RETURN)) { switch (fState) { case SINGLE_LINE_COMMENT: // case CHARACTER: // case STRING_DQ: // case STRING_SQ: ScanState last; PartState newState; switch (ch) { case '/': last = ScanState.SLASH; newState = PartState.PHP; break; case '*': last = ScanState.STAR; newState = PartState.PHP; break; case '\'': last = ScanState.NONE; newState = PartState.STRING_SQ; break; case '"': last = ScanState.NONE; newState = PartState.STRING_DQ; break; case '\r': last = ScanState.CARRIAGE_RETURN; newState = PartState.PHP; break; case '\\': last = ScanState.BACKSLASH; newState = PartState.PHP; break; default: last = ScanState.NONE; newState = PartState.PHP; break; } fLast = ScanState.NONE; // ignore fLast return preFix (fState, newState, last, 1); default: break; } } } // states switch (fState) { case PHP: switch (ch) { case '#': // Start of a single line comment if (fTokenLength > 0) { return preFix (PartState.PHP, PartState.SINGLE_LINE_COMMENT, ScanState.NONE, 1); } else { preFix (PartState.PHP, PartState.SINGLE_LINE_COMMENT, ScanState.NONE, 1); fTokenOffset += fTokenLength; fTokenLength = fPrefixLength; } break; case '<': if (fLast == ScanState.LESS) { fTokenLength++; fLast = ScanState.LESS_LESS; } else if (fLast == ScanState.LESS_LESS) { if (fTokenLength - getLastLength(fLast) > 0) { // this is the start of a single line comment return preFix (PartState.PHP, PartState.STRING_HEREDOC, ScanState.LESS_LESS_LESS, 3); } else { preFix (PartState.PHP, PartState.STRING_HEREDOC, ScanState.LESS_LESS_LESS, 3); fTokenOffset += fTokenLength; fTokenLength = fPrefixLength; } } else { fTokenLength++; fLast = ScanState.LESS; } break; case '/': // Start of single line comment? if (fLast == ScanState.SLASH) { // If previous character was already a slash, if (fTokenLength - getLastLength(fLast) > 0) { // this is the start of a single line comment return preFix (PartState.PHP, PartState.SINGLE_LINE_COMMENT, ScanState.NONE, 2); } else { preFix (PartState.PHP, PartState.SINGLE_LINE_COMMENT, ScanState.NONE, 2); fTokenOffset += fTokenLength; fTokenLength = fPrefixLength; } } else { fTokenLength++; fLast = ScanState.SLASH; // We currently found a slash } break; case '*': if (fLast == ScanState.SLASH) { // If previous character was a slash if (fTokenLength - getLastLength (fLast) > 0) { // this is the start of a comment /* return preFix (PartState.PHP, PartState.MULTI_LINE_COMMENT, ScanState.SLASH_STAR, 2); } else { preFix (PartState.PHP, PartState.MULTI_LINE_COMMENT, ScanState.SLASH_STAR, 2); fTokenOffset += fTokenLength; fTokenLength = fPrefixLength; } } else { // No slash before the '*', so it's a normal character consume (); } break; case '\'': // The start of a single quoted string fLast = ScanState.NONE; // ignore fLast if (fTokenLength > 0) { return preFix (PartState.PHP, PartState.STRING_SQ, ScanState.NONE, 1); } else { preFix (PartState.PHP, PartState.STRING_SQ, ScanState.NONE, 1); fTokenOffset += fTokenLength; fTokenLength = fPrefixLength; } break; case '"': // The start of a double quoted string fLast = ScanState.NONE; // ignore fLast if (fTokenLength > 0) { return preFix (PartState.PHP, PartState.STRING_DQ, ScanState.NONE, 1); } else { preFix (PartState.PHP, PartState.STRING_DQ, ScanState.NONE, 1); fTokenOffset += fTokenLength; fTokenLength = fPrefixLength; } break; default: // Just a normal character with no special meaning consume (); break; } break; case SINGLE_LINE_COMMENT: // We are just running within a single line comment (started with // or #) consume(); break; case PHPDOC: // We are just running within a php doc comment switch (ch) { case '/': switch (fLast) { case SLASH_STAR_STAR: return postFix (PartState.MULTI_LINE_COMMENT); case STAR: return postFix (PartState.PHPDOC); // Found the end of the php doc (multi line) comment default: consume(); break; } break; case '*': // Found a '*' fTokenLength++; fLast = ScanState.STAR; // Remember that we found a '*' break; default: consume(); break; } break; case MULTI_LINE_COMMENT: // We are currently running through a (possible) multi line comment switch (ch) { case '*': // and we found a '*' if (fLast == ScanState.SLASH_STAR) { // If the previous characters have been a /* fLast = ScanState.SLASH_STAR_STAR; fTokenLength++; fState = PartState.PHPDOC; } else { fTokenLength++; fLast = ScanState.STAR; } break; case '/': if (fLast == ScanState.STAR) { return postFix (PartState.MULTI_LINE_COMMENT); } else { consume(); break; } default: consume(); break; } break; case STRING_DQ: switch (ch) { case '\\': fLast = (fLast == ScanState.BACKSLASH) ? ScanState.NONE : ScanState.BACKSLASH; fTokenLength++; break; case '\"': if (fLast != ScanState.BACKSLASH) { return postFix (PartState.STRING_DQ); } else { consume(); } break; default: consume(); break; } break; case STRING_SQ: switch (ch) { case '\\': fLast = (fLast == ScanState.BACKSLASH) ? ScanState.NONE : ScanState.BACKSLASH; fTokenLength++; break; case '\'': if (fLast != ScanState.BACKSLASH) { return postFix (PartState.STRING_SQ); } else { consume(); } break; default: consume(); break; } break; case STRING_HEREDOC: // We are just running within a heredoc string switch (fLast) { case LESS_LESS_LESS: // The first time after we recognized the '<<<' fLast = ScanState.HEREDOC_ID; // We do a scan of the heredoc id string fHeredocId = ""; fHeredocId += (char) ch; fTokenLength++; break; case HEREDOC_ID: // Scan the starting heredoc ID if (ch == ' ') { fLast = ScanState.HEREDOC; fTokenLength++; } else { fHeredocId += (char) ch; fTokenLength++; } break; case CARRIAGE_RETURN: // We previously found a new line fTokenLength++; fHeredocIdEnd = ""; fHeredocIdEnd += (char) ch; // Add the first character to the (possible) end ID fLast = ScanState.HEREDOC_ID_END; // Go for scanning the (possible) end ID break; case HEREDOC_ID_END: // We scan the (possible) end ID if (ch == ';') { // End ID ends with an ';' if (fHeredocId.compareTo (fHeredocIdEnd) == 0) { // If start ID and end ID matches. return postFix (PartState.STRING_HEREDOC); // It's the end of a heredoc partition } else { consume (); // Wrong end ID, so just eat the character } } else { fTokenLength++; // fHeredocIdEnd += (char) ch; // Add the characther to the possible heredoc end ID } break; default: // Normally state NONE consume (); // Eat the character break; } break; } // end of switch (fState) } } private static final int getLastLength (ScanState last) { switch (last) { default: return -1; case NONE: return 0; case LESS: case CARRIAGE_RETURN: case BACKSLASH: case SLASH: case STAR: return 1; case LESS_LESS: case SLASH_STAR: return 2; case SLASH_STAR_STAR: return 3; case HEREDOC: return 3; } } private final void consume() { fTokenLength++; // Count the character fLast = ScanState.NONE; // Reset scanner state to nothing special } /** * If we found the end of a partition, return the type of the partition which is currently finished * * @param state The type of partition we found the end for * @return */ private final IToken postFix (PartState state) { fTokenLength++; fLast = ScanState.NONE; // Reset the scanner state fState = PartState.PHP; // The type of the next partition is just PHP fPrefixLength = 0; // and have no prefix length return fTokens[state.ordinal()]; // Return the type of partition for which we found the end } /** * If we find the prefix of a new partition, return the type of the previous partition * * @param state * @param newState * @param last * @param prefixLength * @return */ private final IToken preFix (PartState oldState, PartState newState, ScanState last, int prefixLength) { if (fEmulate && // If we are in emulation run (oldState == PartState.PHP) && (fTokenLength - getLastLength (fLast) > 0)) { fTokenLength -= getLastLength (fLast); fJavaOffset = fTokenOffset; fJavaLength = fTokenLength; fTokenLength = 1; fState = newState; fPrefixLength = prefixLength; fLast = last; return fTokens[oldState.ordinal()]; } else { fTokenLength -= getLastLength (fLast); // Set the length of the last token (partition) fLast = last; // Remember the type of the type of the last partition fPrefixLength = prefixLength; // Remember the length of the currently found start of new partition fState = newState; // The type of the new partition we found IToken token = fTokens[oldState.ordinal()]; // Return the type of the old partition return token; } } private static PartState getState (String contentType) { if (contentType == null) return PartState.PHP; else if (contentType.equals (PHP_SINGLELINE_COMMENT)) return PartState.SINGLE_LINE_COMMENT; else if (contentType.equals (PHP_MULTILINE_COMMENT)) return PartState.MULTI_LINE_COMMENT; else if (contentType.equals (PHP_PHPDOC_COMMENT)) return PartState.PHPDOC; else if (contentType.equals (PHP_STRING_DQ)) return PartState.STRING_DQ; else if (contentType.equals (PHP_STRING_SQ)) return PartState.STRING_SQ; else if (contentType.equals (PHP_STRING_HEREDOC)) return PartState.STRING_HEREDOC; else return PartState.PHP; } /** * @see IPartitionTokenScanner#setPartialRange (IDocument, int, int, String, int) * * @note Because of the PHP heredoc syntax we need to parse from the beginning of a heredoc partition, * and not from anywhere in the middle. When not reading the start of the heredoc (and the correct heredoc start ID, * we can't recognize the correct heredoc end ID. So we start if possible form the partitionOffset. * */ public void setPartialRange (IDocument document, int offset, int length, String contentType, int partitionOffset) { if (partitionOffset >= 0) { fScanner.setRange (document, partitionOffset, length + (offset - partitionOffset)); fTokenOffset = partitionOffset; fTokenLength = 0; fPrefixLength = 0; fLast = ScanState.NONE; fState = PartState.PHP; // restart at beginning of partition } else { fScanner.setRange (document, offset, length); fTokenOffset = partitionOffset; fTokenLength = 0; fPrefixLength = offset - partitionOffset; fLast = ScanState.NONE; if (offset == partitionOffset) { fState = PartState.PHP; // restart at beginning of partition } else { fState = getState(contentType); } } // emulate JavaPartitionScanner if (fEmulate) { fJavaOffset = -1; fJavaLength = 0; } } /** * @see ITokenScanner#setRange(IDocument, int, int) */ public void setRange (IDocument document, int offset, int length) { fScanner.setRange (document, offset, length); fTokenOffset = offset; fTokenLength = 0; fPrefixLength = 0; fLast = ScanState.NONE; fState = PartState.PHP; // emulate JavaPartitionScanner if (fEmulate) { fJavaOffset = -1; fJavaLength = 0; } } /* * @see ITokenScanner#getTokenLength() */ public int getTokenLength() { return fTokenLength; } /* * @see ITokenScanner#getTokenOffset() */ public int getTokenOffset() { if (AbstractPartitioner.DEBUG) { Assert.isTrue(fTokenOffset >= 0, Integer.toString(fTokenOffset)); } return fTokenOffset; } }