1 /**********************************************************************
2 Copyright (c) 2000, 2002 IBM Corp. and others.
3 All rights reserved. This program and the accompanying materials
4 are made available under the terms of the Common Public License v1.0
5 which accompanies this distribution, and is available at
6 http://www.eclipse.org/legal/cpl-v10.html
9 IBM Corporation - Initial implementation
10 Klaus Hartlage - www.eclipseproject.de
11 **********************************************************************/
12 package net.sourceforge.phpeclipse.phpeditor.php;
14 import java.io.CharArrayWriter;
15 import java.util.ArrayList;
16 import java.util.List;
18 import org.eclipse.jface.text.rules.ICharacterScanner;
19 import org.eclipse.jface.text.rules.IPredicateRule;
20 import org.eclipse.jface.text.rules.IToken;
21 import org.eclipse.jface.text.rules.IWordDetector;
22 import org.eclipse.jface.text.rules.MultiLineRule;
23 import org.eclipse.jface.text.rules.RuleBasedPartitionScanner;
24 import org.eclipse.jface.text.rules.Token;
25 import org.eclipse.jface.text.rules.WordRule;
28 * This scanner recognizes the JavaDoc comments and Java multi line comments.
30 public class PHPPartitionScanner extends RuleBasedPartitionScanner {
32 private final static String SKIP = "__skip"; //$NON-NLS-1$
33 public final static String HTML_MULTILINE_COMMENT = "__html_multiline_comment"; //$NON-NLS-1$
34 // public final static String JAVA_DOC= "__java_javadoc"; //$NON-NLS-1$
35 public final static String PHP = "__php";
36 // public final static String HTML = "__html";
38 public final static IToken php = new Token(PHP);
39 // public final static IToken html = new Token(HTML);
40 public final static IToken comment = new Token(HTML_MULTILINE_COMMENT);
42 protected final static char[] php0EndSequence = { '<', '?' };
43 protected final static char[] php1EndSequence = { '<', '?', 'p', 'h', 'p' };
44 protected final static char[] php2EndSequence = { '<', '?', 'P', 'H', 'P' };
45 private StringBuffer test;
47 public class PHPMultiLineRule extends MultiLineRule {
49 public PHPMultiLineRule(String startSequence, String endSequence, IToken token) {
50 super(startSequence, endSequence, token);
53 public PHPMultiLineRule(String startSequence, String endSequence, IToken token, char escapeCharacter) {
54 super(startSequence, endSequence, token, escapeCharacter);
57 protected boolean endSequenceDetected(ICharacterScanner scanner) {
61 boolean lineCommentMode = false;
62 boolean multiLineCommentMode = false;
63 boolean stringMode = false;
65 char[][] delimiters = scanner.getLegalLineDelimiters();
66 while ((c = scanner.read()) != ICharacterScanner.EOF) {
67 if (lineCommentMode && (c == '\n')) {
68 lineCommentMode = false;
69 // read until end of line
70 } else if ((!stringMode) && (c == '#')) {
71 // read until end of line
72 lineCommentMode = true;
74 } else if ((!stringMode) && (!multiLineCommentMode) && (c == '/')) {
77 lineCommentMode = true;
79 } else if (c2 == '*') {
80 multiLineCommentMode = true;
85 } else if (c == '*' && multiLineCommentMode) {
88 multiLineCommentMode = false;
93 } else if (c == '\\' && stringMode) {
100 } else if ((!lineCommentMode) && (!multiLineCommentMode) && (c == '"')) {
108 if (lineCommentMode || multiLineCommentMode || stringMode) {
112 if (c == fEscapeCharacter) {
113 // Skip the escaped character.
115 } else if (fEndSequence.length > 0 && c == fEndSequence[0]) {
116 // Check if the specified end sequence has been found.
117 if (sequenceDetected(scanner, fEndSequence, true))
119 } else if (fBreaksOnEOL) {
120 // Check for end of line since it can be used to terminate the pattern.
121 for (int i = 0; i < delimiters.length; i++) {
122 if (c == delimiters[i][0] && sequenceDetected(scanner, delimiters[i], false))
132 // public class HTMLMultiLineRule extends MultiLineRule {
134 // public HTMLMultiLineRule(String startSequence, String endSequence, IToken token) {
135 // super(startSequence, endSequence, token);
138 // public HTMLMultiLineRule(String startSequence, String endSequence, IToken token, char escapeCharacter) {
139 // super(startSequence, endSequence, token, escapeCharacter);
142 // protected boolean endSequenceDetected(ICharacterScanner scanner) {
145 // char[][] delimiters = scanner.getLegalLineDelimiters();
146 // while ((c = scanner.read()) != ICharacterScanner.EOF) {
148 // // scanner.unread();
149 // if (sequenceDetected(scanner, php2EndSequence, true)) {
158 // if (sequenceDetected(scanner, php1EndSequence, true)) {
167 // if (sequenceDetected(scanner, php0EndSequence, true)) {
173 // // scanner.read();
181 // protected IToken doEvaluate(ICharacterScanner scanner, boolean resume) {
185 // if (endSequenceDetected(scanner))
190 // int c = scanner.read();
191 // // if (c == fStartSequence[0]) {
192 // // if (sequenceDetected(scanner, fStartSequence, false)) {
193 // if (endSequenceDetected(scanner))
200 // return Token.UNDEFINED;
203 // public IToken evaluate(ICharacterScanner scanner, boolean resume) {
204 // if (fColumn == UNDEFINED)
205 // return doEvaluate(scanner, resume);
207 // int c = scanner.read();
209 // // if (c == fStartSequence[0])
210 // return (fColumn == scanner.getColumn() ? doEvaluate(scanner, resume) : Token.UNDEFINED);
212 // // return Token.UNDEFINED;
216 public class HTMLPatternRule implements IPredicateRule {
218 protected static final int UNDEFINED = -1;
220 /** The token to be returned on success */
221 protected IToken fToken;
223 /** The pattern's column constrain */
224 protected int fColumn = UNDEFINED;
225 /** The pattern's escape character */
226 protected char fEscapeCharacter;
227 /** Indicates whether end of line termines the pattern */
228 protected boolean fBreaksOnEOL;
231 * Creates a rule for the given starting and ending sequence.
232 * When these sequences are detected the rule will return the specified token.
233 * Alternatively, the sequence can also be ended by the end of the line.
234 * Any character which follows the given escapeCharacter will be ignored.
236 * @param startSequence the pattern's start sequence
237 * @param endSequence the pattern's end sequence, <code>null</code> is a legal value
238 * @param token the token which will be returned on success
239 * @param escapeCharacter any character following this one will be ignored
240 * @param indicates whether the end of the line also termines the pattern
242 public HTMLPatternRule(IToken token) {
244 fEscapeCharacter = (char)0;
245 fBreaksOnEOL = false;
249 * Sets a column constraint for this rule. If set, the rule's token
250 * will only be returned if the pattern is detected starting at the
251 * specified column. If the column is smaller then 0, the column
252 * constraint is considered removed.
254 * @param column the column in which the pattern starts
256 public void setColumnConstraint(int column) {
263 * Evaluates this rules without considering any column constraints.
265 * @param scanner the character scanner to be used
266 * @return the token resulting from this evaluation
268 protected IToken doEvaluate(ICharacterScanner scanner) {
269 return doEvaluate(scanner, false);
273 * Evaluates this rules without considering any column constraints. Resumes
274 * detection, i.e. look sonly for the end sequence required by this rule if the
275 * <code>resume</code> flag is set.
277 * @param scanner the character scanner to be used
278 * @param resume <code>true</code> if detection should be resumed, <code>false</code> otherwise
279 * @return the token resulting from this evaluation
282 protected IToken doEvaluate(ICharacterScanner scanner, boolean resume) {
286 if (endSequenceDetected(scanner))
291 int c = scanner.read();
292 // if (c == fStartSequence[0]) {
293 // if (sequenceDetected(scanner, fStartSequence, false)) {
294 if (endSequenceDetected(scanner))
301 return Token.UNDEFINED;
305 * @see IRule#evaluate
307 public IToken evaluate(ICharacterScanner scanner) {
308 return evaluate(scanner, false);
312 * Returns whether the end sequence was detected. As the pattern can be considered
313 * ended by a line delimiter, the result of this method is <code>true</code> if the
314 * rule breaks on the end of the line, or if the EOF character is read.
316 * @param scanner the character scanner to be used
317 * @return <code>true</code> if the end sequence has been detected
319 protected boolean endSequenceDetected(ICharacterScanner scanner) {
322 char[][] delimiters = scanner.getLegalLineDelimiters();
323 while ((c = scanner.read()) != ICharacterScanner.EOF) {
326 if (sequenceDetected(scanner, php2EndSequence, true)) {
335 if (sequenceDetected(scanner, php1EndSequence, true)) {
344 if (sequenceDetected(scanner, php0EndSequence, true)) {
359 * Returns whether the next characters to be read by the character scanner
360 * are an exact match with the given sequence. No escape characters are allowed
361 * within the sequence. If specified the sequence is considered to be found
362 * when reading the EOF character.
364 * @param scanner the character scanner to be used
365 * @param sequence the sequence to be detected
366 * @param eofAllowed indicated whether EOF terminates the pattern
367 * @return <code>true</code> if the given sequence has been detected
369 protected boolean sequenceDetected(ICharacterScanner scanner, char[] sequence, boolean eofAllowed) {
370 for (int i = 1; i < sequence.length; i++) {
371 int c = scanner.read();
372 if (c == ICharacterScanner.EOF && eofAllowed) {
374 } else if (c != sequence[i]) {
375 // Non-matching character detected, rewind the scanner back to the start.
377 for (int j = i - 1; j > 0; j--)
387 * @see IPredicateRule#evaluate(ICharacterScanner, boolean)
390 public IToken evaluate(ICharacterScanner scanner, boolean resume) {
391 if (fColumn == UNDEFINED)
392 return doEvaluate(scanner, resume);
394 int c = scanner.read();
396 // if (c == fStartSequence[0])
397 return (fColumn == scanner.getColumn() ? doEvaluate(scanner, resume) : Token.UNDEFINED);
399 // return Token.UNDEFINED;
403 * @see IPredicateRule#getSuccessToken()
406 public IToken getSuccessToken() {
411 * Detector for empty comments.
413 static class EmptyCommentDetector implements IWordDetector {
416 * Method declared on IWordDetector
418 public boolean isWordStart(char c) {
423 * Method declared on IWordDetector
425 public boolean isWordPart(char c) {
426 return (c == '*' || c == '/');
433 static class WordPredicateRule extends WordRule implements IPredicateRule {
435 private IToken fSuccessToken;
437 public WordPredicateRule(IToken successToken) {
438 super(new EmptyCommentDetector());
439 fSuccessToken = successToken;
440 addWord("/**/", fSuccessToken);
444 * @see org.eclipse.jface.text.rules.IPredicateRule#evaluate(ICharacterScanner, boolean)
446 public IToken evaluate(ICharacterScanner scanner, boolean resume) {
447 return super.evaluate(scanner);
451 * @see org.eclipse.jface.text.rules.IPredicateRule#getSuccessToken()
453 public IToken getSuccessToken() {
454 return fSuccessToken;
459 * Creates the partitioner and sets up the appropriate rules.
461 public PHPPartitionScanner() {
464 // IToken php = new Token(PHP);
465 // IToken html = new Token(HTML);
466 // IToken comment = new Token(HTML_MULTILINE_COMMENT);
468 List rules = new ArrayList();
470 // Add rule for single line comments.
471 // rules.add(new EndOfLineRule("//", Token.UNDEFINED));
473 // Add rule for strings and character constants.
474 // rules.add(new SingleLineRule("\"", "\"", Token.UNDEFINED, '\\'));
475 // rules.add(new SingleLineRule("'", "'", Token.UNDEFINED, '\\'));
477 // Add special case word rule.
478 // rules.add(new WordPredicateRule(comment));
480 // Add rules for multi-line comments and javadoc.
481 //rules.add(new MultiLineRule("/**", "*/", javaDoc));
482 // rules.add(new HTMLMultiLineRule("<", "<?", html));
484 rules.add(new MultiLineRule("<!--", "-->", comment));
485 rules.add(new PHPMultiLineRule("<?\r", "?>", php));
486 rules.add(new PHPMultiLineRule("<?\n", "?>", php));
487 rules.add(new PHPMultiLineRule("<?\t", "?>", php));
488 rules.add(new PHPMultiLineRule("<? ", "?>", php));
489 rules.add(new PHPMultiLineRule("<?php", "?>", php));
490 rules.add(new PHPMultiLineRule("<?PHP", "?>", php));
492 // rules.add(new HTMLPatternRule(html)); // "<", "<?",
493 //Add rule for processing instructions
495 IPredicateRule[] result = new IPredicateRule[rules.size()];
496 rules.toArray(result);
497 setPredicateRules(result);
498 // setDefaultReturnToken(html);
501 // public IToken nextToken() {
503 // if (fContentType == null || fRules == null)
504 // return getNextToken();
506 // fTokenOffset= fOffset;
507 // fColumn= UNDEFINED;
508 // boolean resume= (fPartitionOffset < fOffset);
510 // IPredicateRule rule;
513 // for (int i= 0; i < fRules.length; i++) {
514 // rule= (IPredicateRule) fRules[i];
515 // token= rule.getSuccessToken();
516 // if (fContentType.equals(token.getData())) {
518 // fTokenOffset= fPartitionOffset;
519 // token= rule.evaluate(this, resume);
520 // if (!token.isUndefined()) {
521 // fContentType= null;
527 // fContentType= null;
528 // return getNextToken();
531 // public IToken getNextToken() {
537 // fTokenOffset= fOffset;
538 // fColumn= UNDEFINED;
540 // if (fRules != null) {
541 // for (int i= 0; i < fRules.length; i++) {
542 // token= (fRules[i].evaluate(this));
543 // if (!token.isUndefined())
548 // if (read() == EOF)
551 // return fDefaultReturnToken;