1 /**********************************************************************
2 Copyright (c) 2000, 2002 IBM Corp. and others.
3 All rights reserved. This program and the accompanying materials
4 are made available under the terms of the Common Public License v1.0
5 which accompanies this distribution, and is available at
6 http://www.eclipse.org/legal/cpl-v10.html
9 IBM Corporation - Initial implementation
10 Klaus Hartlage - www.eclipseproject.de
11 **********************************************************************/
12 package net.sourceforge.phpeclipse.phpeditor.php;
14 import java.io.CharArrayWriter;
15 import java.util.ArrayList;
16 import java.util.List;
18 import org.eclipse.jface.text.rules.ICharacterScanner;
19 import org.eclipse.jface.text.rules.IPredicateRule;
20 import org.eclipse.jface.text.rules.IToken;
21 import org.eclipse.jface.text.rules.IWordDetector;
22 import org.eclipse.jface.text.rules.MultiLineRule;
23 import org.eclipse.jface.text.rules.RuleBasedPartitionScanner;
24 import org.eclipse.jface.text.rules.Token;
25 import org.eclipse.jface.text.rules.WordRule;
28 * This scanner recognizes the JavaDoc comments and Java multi line comments.
30 public class PHPPartitionScanner extends RuleBasedPartitionScanner {
32 private final static String SKIP = "__skip"; //$NON-NLS-1$
33 public final static String HTML_MULTILINE_COMMENT = "__html_multiline_comment"; //$NON-NLS-1$
34 // public final static String JAVA_DOC= "__java_javadoc"; //$NON-NLS-1$
35 public final static String PHP = "__php";
36 // public final static String HTML = "__html";
38 public final static IToken php = new Token(PHP);
39 // public final static IToken html = new Token(HTML);
40 public final static IToken comment = new Token(HTML_MULTILINE_COMMENT);
42 protected final static char[] php0EndSequence = { '<', '?' };
43 protected final static char[] php1EndSequence = { '<', '?', 'p', 'h', 'p' };
44 protected final static char[] php2EndSequence = { '<', '?', 'P', 'H', 'P' };
46 private StringBuffer test;
48 public class PHPMultiLineRule extends MultiLineRule {
50 public PHPMultiLineRule(String startSequence, String endSequence, IToken token) {
51 super(startSequence, endSequence, token);
54 public PHPMultiLineRule(String startSequence, String endSequence, IToken token, char escapeCharacter) {
55 super(startSequence, endSequence, token, escapeCharacter);
58 protected boolean endSequenceDetected(ICharacterScanner scanner) {
62 boolean lineCommentMode = false;
63 boolean multiLineCommentMode = false;
64 boolean stringMode = false;
66 char[][] delimiters = scanner.getLegalLineDelimiters();
67 while ((c = scanner.read()) != ICharacterScanner.EOF) {
69 // read until end of line
70 while ((c = scanner.read()) != ICharacterScanner.EOF) {
71 if (fEndSequence.length > 0 && c == fEndSequence[0]) {
72 // Check if the specified end sequence has been found.
73 if (sequenceDetected(scanner, fEndSequence, true))
75 } else if (c == '\n') {
80 } else if (c == '/' && (c = scanner.read()) != ICharacterScanner.EOF) {
82 // read until end of line
83 while ((c = scanner.read()) != ICharacterScanner.EOF) {
84 if (fEndSequence.length > 0 && c == fEndSequence[0]) {
85 // Check if the specified end sequence has been found.
86 if (sequenceDetected(scanner, fEndSequence, true))
88 } else if (c == '\n') {
93 } else if (c == '*') {
95 while ((c = scanner.read()) != ICharacterScanner.EOF) {
96 if (c == '*' && (c = scanner.read()) != ICharacterScanner.EOF) {
108 } else if (c == '"') {
110 while ((c = scanner.read()) != ICharacterScanner.EOF) {
113 } else if (c == '"') {
118 } else if (c == '\'') {
120 while ((c = scanner.read()) != ICharacterScanner.EOF) {
123 } else if (c == '\'') {
130 if (c == fEscapeCharacter) {
131 // Skip the escaped character.
133 } else if (fEndSequence.length > 0 && c == fEndSequence[0]) {
134 // Check if the specified end sequence has been found.
135 if (sequenceDetected(scanner, fEndSequence, true))
137 } else if (fBreaksOnEOL) {
138 // Check for end of line since it can be used to terminate the pattern.
139 for (int i = 0; i < delimiters.length; i++) {
140 if (c == delimiters[i][0] && sequenceDetected(scanner, delimiters[i], false))
145 boolean phpMode = false;
146 if (c == ICharacterScanner.EOF) {
154 // public class HTMLMultiLineRule extends MultiLineRule {
156 // public HTMLMultiLineRule(String startSequence, String endSequence, IToken token) {
157 // super(startSequence, endSequence, token);
160 // public HTMLMultiLineRule(String startSequence, String endSequence, IToken token, char escapeCharacter) {
161 // super(startSequence, endSequence, token, escapeCharacter);
164 // protected boolean endSequenceDetected(ICharacterScanner scanner) {
167 // char[][] delimiters = scanner.getLegalLineDelimiters();
168 // while ((c = scanner.read()) != ICharacterScanner.EOF) {
170 // // scanner.unread();
171 // if (sequenceDetected(scanner, php2EndSequence, true)) {
180 // if (sequenceDetected(scanner, php1EndSequence, true)) {
189 // if (sequenceDetected(scanner, php0EndSequence, true)) {
195 // // scanner.read();
203 // protected IToken doEvaluate(ICharacterScanner scanner, boolean resume) {
207 // if (endSequenceDetected(scanner))
212 // int c = scanner.read();
213 // // if (c == fStartSequence[0]) {
214 // // if (sequenceDetected(scanner, fStartSequence, false)) {
215 // if (endSequenceDetected(scanner))
222 // return Token.UNDEFINED;
225 // public IToken evaluate(ICharacterScanner scanner, boolean resume) {
226 // if (fColumn == UNDEFINED)
227 // return doEvaluate(scanner, resume);
229 // int c = scanner.read();
231 // // if (c == fStartSequence[0])
232 // return (fColumn == scanner.getColumn() ? doEvaluate(scanner, resume) : Token.UNDEFINED);
234 // // return Token.UNDEFINED;
238 public class HTMLPatternRule implements IPredicateRule {
240 protected static final int UNDEFINED = -1;
242 /** The token to be returned on success */
243 protected IToken fToken;
245 /** The pattern's column constrain */
246 protected int fColumn = UNDEFINED;
247 /** The pattern's escape character */
248 protected char fEscapeCharacter;
249 /** Indicates whether end of line termines the pattern */
250 protected boolean fBreaksOnEOL;
253 * Creates a rule for the given starting and ending sequence.
254 * When these sequences are detected the rule will return the specified token.
255 * Alternatively, the sequence can also be ended by the end of the line.
256 * Any character which follows the given escapeCharacter will be ignored.
258 * @param startSequence the pattern's start sequence
259 * @param endSequence the pattern's end sequence, <code>null</code> is a legal value
260 * @param token the token which will be returned on success
261 * @param escapeCharacter any character following this one will be ignored
262 * @param indicates whether the end of the line also termines the pattern
264 public HTMLPatternRule(IToken token) {
266 fEscapeCharacter = (char) 0;
267 fBreaksOnEOL = false;
271 * Sets a column constraint for this rule. If set, the rule's token
272 * will only be returned if the pattern is detected starting at the
273 * specified column. If the column is smaller then 0, the column
274 * constraint is considered removed.
276 * @param column the column in which the pattern starts
278 public void setColumnConstraint(int column) {
285 * Evaluates this rules without considering any column constraints.
287 * @param scanner the character scanner to be used
288 * @return the token resulting from this evaluation
290 protected IToken doEvaluate(ICharacterScanner scanner) {
291 return doEvaluate(scanner, false);
295 * Evaluates this rules without considering any column constraints. Resumes
296 * detection, i.e. look sonly for the end sequence required by this rule if the
297 * <code>resume</code> flag is set.
299 * @param scanner the character scanner to be used
300 * @param resume <code>true</code> if detection should be resumed, <code>false</code> otherwise
301 * @return the token resulting from this evaluation
304 protected IToken doEvaluate(ICharacterScanner scanner, boolean resume) {
308 if (endSequenceDetected(scanner))
313 int c = scanner.read();
314 // if (c == fStartSequence[0]) {
315 // if (sequenceDetected(scanner, fStartSequence, false)) {
316 if (endSequenceDetected(scanner))
323 return Token.UNDEFINED;
327 * @see IRule#evaluate
329 public IToken evaluate(ICharacterScanner scanner) {
330 return evaluate(scanner, false);
334 * Returns whether the end sequence was detected. As the pattern can be considered
335 * ended by a line delimiter, the result of this method is <code>true</code> if the
336 * rule breaks on the end of the line, or if the EOF character is read.
338 * @param scanner the character scanner to be used
339 * @return <code>true</code> if the end sequence has been detected
341 protected boolean endSequenceDetected(ICharacterScanner scanner) {
344 char[][] delimiters = scanner.getLegalLineDelimiters();
345 while ((c = scanner.read()) != ICharacterScanner.EOF) {
348 if (sequenceDetected(scanner, php2EndSequence, true)) {
357 if (sequenceDetected(scanner, php1EndSequence, true)) {
366 if (sequenceDetected(scanner, php0EndSequence, true)) {
381 * Returns whether the next characters to be read by the character scanner
382 * are an exact match with the given sequence. No escape characters are allowed
383 * within the sequence. If specified the sequence is considered to be found
384 * when reading the EOF character.
386 * @param scanner the character scanner to be used
387 * @param sequence the sequence to be detected
388 * @param eofAllowed indicated whether EOF terminates the pattern
389 * @return <code>true</code> if the given sequence has been detected
391 protected boolean sequenceDetected(ICharacterScanner scanner, char[] sequence, boolean eofAllowed) {
392 for (int i = 1; i < sequence.length; i++) {
393 int c = scanner.read();
394 if (c == ICharacterScanner.EOF && eofAllowed) {
396 } else if (c != sequence[i]) {
397 // Non-matching character detected, rewind the scanner back to the start.
399 for (int j = i - 1; j > 0; j--)
409 * @see IPredicateRule#evaluate(ICharacterScanner, boolean)
412 public IToken evaluate(ICharacterScanner scanner, boolean resume) {
413 if (fColumn == UNDEFINED)
414 return doEvaluate(scanner, resume);
416 int c = scanner.read();
418 // if (c == fStartSequence[0])
419 return (fColumn == scanner.getColumn() ? doEvaluate(scanner, resume) : Token.UNDEFINED);
421 // return Token.UNDEFINED;
425 * @see IPredicateRule#getSuccessToken()
428 public IToken getSuccessToken() {
433 * Detector for empty comments.
435 static class EmptyCommentDetector implements IWordDetector {
438 * Method declared on IWordDetector
440 public boolean isWordStart(char c) {
445 * Method declared on IWordDetector
447 public boolean isWordPart(char c) {
448 return (c == '*' || c == '/');
455 static class WordPredicateRule extends WordRule implements IPredicateRule {
457 private IToken fSuccessToken;
459 public WordPredicateRule(IToken successToken) {
460 super(new EmptyCommentDetector());
461 fSuccessToken = successToken;
462 addWord("/**/", fSuccessToken);
466 * @see org.eclipse.jface.text.rules.IPredicateRule#evaluate(ICharacterScanner, boolean)
468 public IToken evaluate(ICharacterScanner scanner, boolean resume) {
469 return super.evaluate(scanner);
473 * @see org.eclipse.jface.text.rules.IPredicateRule#getSuccessToken()
475 public IToken getSuccessToken() {
476 return fSuccessToken;
481 * Creates the partitioner and sets up the appropriate rules.
483 public PHPPartitionScanner() {
486 // IToken php = new Token(PHP);
487 // IToken html = new Token(HTML);
488 // IToken comment = new Token(HTML_MULTILINE_COMMENT);
490 List rules = new ArrayList();
492 // Add rule for single line comments.
493 // rules.add(new EndOfLineRule("//", Token.UNDEFINED));
495 // Add rule for strings and character constants.
496 // rules.add(new SingleLineRule("\"", "\"", Token.UNDEFINED, '\\'));
497 // rules.add(new SingleLineRule("'", "'", Token.UNDEFINED, '\\'));
499 // Add special case word rule.
500 // rules.add(new WordPredicateRule(comment));
502 // Add rules for multi-line comments and javadoc.
503 //rules.add(new MultiLineRule("/**", "*/", javaDoc));
504 // rules.add(new HTMLMultiLineRule("<", "<?", html));
506 rules.add(new MultiLineRule("<!--", "-->", comment));
507 rules.add(new PHPMultiLineRule("<?\r", "?>", php));
508 rules.add(new PHPMultiLineRule("<?\n", "?>", php));
509 rules.add(new PHPMultiLineRule("<?\t", "?>", php));
510 rules.add(new PHPMultiLineRule("<? ", "?>", php));
511 rules.add(new PHPMultiLineRule("<?php", "?>", php));
512 rules.add(new PHPMultiLineRule("<?PHP", "?>", php));
514 // rules.add(new HTMLPatternRule(html)); // "<", "<?",
515 //Add rule for processing instructions
517 IPredicateRule[] result = new IPredicateRule[rules.size()];
518 rules.toArray(result);
519 setPredicateRules(result);
520 // setDefaultReturnToken(html);
523 // public IToken nextToken() {
525 // if (fContentType == null || fRules == null)
526 // return getNextToken();
528 // fTokenOffset= fOffset;
529 // fColumn= UNDEFINED;
530 // boolean resume= (fPartitionOffset < fOffset);
532 // IPredicateRule rule;
535 // for (int i= 0; i < fRules.length; i++) {
536 // rule= (IPredicateRule) fRules[i];
537 // token= rule.getSuccessToken();
538 // if (fContentType.equals(token.getData())) {
540 // fTokenOffset= fPartitionOffset;
541 // token= rule.evaluate(this, resume);
542 // if (!token.isUndefined()) {
543 // fContentType= null;
549 // fContentType= null;
550 // return getNextToken();
553 // public IToken getNextToken() {
559 // fTokenOffset= fOffset;
560 // fColumn= UNDEFINED;
562 // if (fRules != null) {
563 // for (int i= 0; i < fRules.length; i++) {
564 // token= (fRules[i].evaluate(this));
565 // if (!token.isUndefined())
570 // if (read() == EOF)
573 // return fDefaultReturnToken;