1 /**********************************************************************
2 Copyright (c) 2000, 2002 IBM Corp. and others.
3 All rights reserved. This program and the accompanying materials
4 are made available under the terms of the Common Public License v1.0
5 which accompanies this distribution, and is available at
6 http://www.eclipse.org/legal/cpl-v10.html
9 IBM Corporation - Initial implementation
10 Klaus Hartlage - www.eclipseproject.de
11 **********************************************************************/
12 package net.sourceforge.phpeclipse.phpeditor.php;
14 import java.io.CharArrayWriter;
15 import java.util.ArrayList;
16 import java.util.List;
18 import org.eclipse.jface.text.rules.ICharacterScanner;
19 import org.eclipse.jface.text.rules.IPredicateRule;
20 import org.eclipse.jface.text.rules.IToken;
21 import org.eclipse.jface.text.rules.IWordDetector;
22 import org.eclipse.jface.text.rules.MultiLineRule;
23 import org.eclipse.jface.text.rules.RuleBasedPartitionScanner;
24 import org.eclipse.jface.text.rules.Token;
25 import org.eclipse.jface.text.rules.WordRule;
28 * This scanner recognizes the JavaDoc comments and Java multi line comments.
30 public class PHPPartitionScanner extends RuleBasedPartitionScanner {
32 private final static String SKIP = "__skip"; //$NON-NLS-1$
33 public final static String HTML_MULTILINE_COMMENT = "__html_multiline_comment"; //$NON-NLS-1$
34 // public final static String JAVA_DOC= "__java_javadoc"; //$NON-NLS-1$
35 public final static String PHP = "__php"; //$NON-NLS-1$
36 //public final static String HTML = "__html"; //$NON-NLS-1$
38 public final static IToken php = new Token(PHP);
39 //public final static IToken html = new Token(HTML);
40 public final static IToken comment = new Token(HTML_MULTILINE_COMMENT);
42 protected final static char[] php0EndSequence = { '<', '?' };
43 protected final static char[] php1EndSequence = { '<', '?', 'p', 'h', 'p' };
44 protected final static char[] php2EndSequence = { '<', '?', 'P', 'H', 'P' };
46 private StringBuffer test;
48 public class PHPMultiLineRule extends MultiLineRule {
50 public PHPMultiLineRule(String startSequence, String endSequence, IToken token) {
51 super(startSequence, endSequence, token);
54 public PHPMultiLineRule(String startSequence, String endSequence, IToken token, char escapeCharacter) {
55 super(startSequence, endSequence, token, escapeCharacter);
58 protected boolean endSequenceDetected(ICharacterScanner scanner) {
62 boolean lineCommentMode = false;
63 boolean multiLineCommentMode = false;
64 boolean stringMode = false;
66 char[][] delimiters = scanner.getLegalLineDelimiters();
67 while ((c = scanner.read()) != ICharacterScanner.EOF) {
69 // read until end of line
70 while ((c = scanner.read()) != ICharacterScanner.EOF) {
71 if (fEndSequence.length > 0 && c == fEndSequence[0]) {
72 // Check if the specified end sequence has been found.
73 if (sequenceDetected(scanner, fEndSequence, true))
75 } else if (c == '\n') {
80 } else if (c == '/' && (c = scanner.read()) != ICharacterScanner.EOF) {
82 // read until end of line
83 while ((c = scanner.read()) != ICharacterScanner.EOF) {
84 if (fEndSequence.length > 0 && c == fEndSequence[0]) {
85 // Check if the specified end sequence has been found.
86 if (sequenceDetected(scanner, fEndSequence, true))
88 } else if (c == '\n') {
93 } else if (c == '*') {
95 while ((c = scanner.read()) != ICharacterScanner.EOF) {
96 if (c == '*' && (c = scanner.read()) != ICharacterScanner.EOF) {
108 } else if (c == '"') {
110 while ((c = scanner.read()) != ICharacterScanner.EOF) {
113 } else if (c == '"') {
118 } else if (c == '\'') {
120 while ((c = scanner.read()) != ICharacterScanner.EOF) {
123 } else if (c == '\'') {
130 if (c == fEscapeCharacter) {
131 // Skip the escaped character.
133 } else if (fEndSequence.length > 0 && c == fEndSequence[0]) {
134 // Check if the specified end sequence has been found.
135 if (sequenceDetected(scanner, fEndSequence, true))
137 } else if (fBreaksOnEOL) {
138 // Check for end of line since it can be used to terminate the pattern.
139 for (int i = 0; i < delimiters.length; i++) {
140 if (c == delimiters[i][0] && sequenceDetected(scanner, delimiters[i], false))
145 boolean phpMode = false;
146 if (c == ICharacterScanner.EOF) {
154 // public class HTMLMultiLineRule extends MultiLineRule {
156 // public HTMLMultiLineRule(String startSequence, String endSequence, IToken token) {
157 // super(startSequence, endSequence, token);
160 // public HTMLMultiLineRule(String startSequence, String endSequence, IToken token, char escapeCharacter) {
161 // super(startSequence, endSequence, token, escapeCharacter);
164 // protected boolean endSequenceDetected(ICharacterScanner scanner) {
167 // char[][] delimiters = scanner.getLegalLineDelimiters();
168 // while ((c = scanner.read()) != ICharacterScanner.EOF) {
170 // // scanner.unread();
171 // if (sequenceDetected(scanner, php2EndSequence, true)) {
180 // if (sequenceDetected(scanner, php1EndSequence, true)) {
189 // if (sequenceDetected(scanner, php0EndSequence, true)) {
195 // // scanner.read();
203 // protected IToken doEvaluate(ICharacterScanner scanner, boolean resume) {
207 // if (endSequenceDetected(scanner))
212 // int c = scanner.read();
213 // // if (c == fStartSequence[0]) {
214 // // if (sequenceDetected(scanner, fStartSequence, false)) {
215 // if (endSequenceDetected(scanner))
222 // return Token.UNDEFINED;
225 // public IToken evaluate(ICharacterScanner scanner, boolean resume) {
226 // if (fColumn == UNDEFINED)
227 // return doEvaluate(scanner, resume);
229 // int c = scanner.read();
231 // // if (c == fStartSequence[0])
232 // return (fColumn == scanner.getColumn() ? doEvaluate(scanner, resume) : Token.UNDEFINED);
234 // // return Token.UNDEFINED;
238 // public class HTMLPatternRule implements IPredicateRule {
240 // protected static final int UNDEFINED = -1;
242 // /** The token to be returned on success */
243 // protected IToken fToken;
245 // /** The pattern's column constrain */
246 // protected int fColumn = UNDEFINED;
247 // /** The pattern's escape character */
248 // protected char fEscapeCharacter;
249 // /** Indicates whether end of line termines the pattern */
250 // protected boolean fBreaksOnEOL;
253 // * Creates a rule for the given starting and ending sequence.
254 // * When these sequences are detected the rule will return the specified token.
255 // * Alternatively, the sequence can also be ended by the end of the line.
256 // * Any character which follows the given escapeCharacter will be ignored.
258 // * @param startSequence the pattern's start sequence
259 // * @param endSequence the pattern's end sequence, <code>null</code> is a legal value
260 // * @param token the token which will be returned on success
261 // * @param escapeCharacter any character following this one will be ignored
262 // * @param indicates whether the end of the line also termines the pattern
264 // public HTMLPatternRule(IToken token) {
266 // fEscapeCharacter = (char) 0;
267 // fBreaksOnEOL = false;
271 // * Sets a column constraint for this rule. If set, the rule's token
272 // * will only be returned if the pattern is detected starting at the
273 // * specified column. If the column is smaller then 0, the column
274 // * constraint is considered removed.
276 // * @param column the column in which the pattern starts
278 // public void setColumnConstraint(int column) {
280 // column = UNDEFINED;
285 // * Evaluates this rules without considering any column constraints.
287 // * @param scanner the character scanner to be used
288 // * @return the token resulting from this evaluation
290 // protected IToken doEvaluate(ICharacterScanner scanner) {
291 // return doEvaluate(scanner, false);
295 // * Evaluates this rules without considering any column constraints. Resumes
296 // * detection, i.e. look sonly for the end sequence required by this rule if the
297 // * <code>resume</code> flag is set.
299 // * @param scanner the character scanner to be used
300 // * @param resume <code>true</code> if detection should be resumed, <code>false</code> otherwise
301 // * @return the token resulting from this evaluation
304 // protected IToken doEvaluate(ICharacterScanner scanner, boolean resume) {
308 // if (endSequenceDetected(scanner))
313 // int c = scanner.read();
314 // // if (c == fStartSequence[0]) {
315 // // if (sequenceDetected(scanner, fStartSequence, false)) {
316 // if (endSequenceDetected(scanner))
323 // return Token.UNDEFINED;
327 // * @see IRule#evaluate
329 // public IToken evaluate(ICharacterScanner scanner) {
330 // return evaluate(scanner, false);
334 // * Returns whether the end sequence was detected. As the pattern can be considered
335 // * ended by a line delimiter, the result of this method is <code>true</code> if the
336 // * rule breaks on the end of the line, or if the EOF character is read.
338 // * @param scanner the character scanner to be used
339 // * @return <code>true</code> if the end sequence has been detected
341 // protected boolean endSequenceDetected(ICharacterScanner scanner) {
344 // char[][] delimiters = scanner.getLegalLineDelimiters();
345 // while ((c = scanner.read()) != ICharacterScanner.EOF) {
347 // // scanner.unread();
348 // if (sequenceDetected(scanner, php2EndSequence, true)) {
357 // if (sequenceDetected(scanner, php1EndSequence, true)) {
366 // if (sequenceDetected(scanner, php0EndSequence, true)) {
372 // // scanner.read();
381 // * Returns whether the next characters to be read by the character scanner
382 // * are an exact match with the given sequence. No escape characters are allowed
383 // * within the sequence. If specified the sequence is considered to be found
384 // * when reading the EOF character.
386 // * @param scanner the character scanner to be used
387 // * @param sequence the sequence to be detected
388 // * @param eofAllowed indicated whether EOF terminates the pattern
389 // * @return <code>true</code> if the given sequence has been detected
391 // protected boolean sequenceDetected(ICharacterScanner scanner, char[] sequence, boolean eofAllowed) {
392 // for (int i = 1; i < sequence.length; i++) {
393 // int c = scanner.read();
394 // if (c == ICharacterScanner.EOF && eofAllowed) {
396 // } else if (c != sequence[i]) {
397 // // Non-matching character detected, rewind the scanner back to the start.
399 // for (int j = i - 1; j > 0; j--)
409 // * @see IPredicateRule#evaluate(ICharacterScanner, boolean)
412 // public IToken evaluate(ICharacterScanner scanner, boolean resume) {
413 // if (fColumn == UNDEFINED)
414 // return doEvaluate(scanner, resume);
416 // int c = scanner.read();
418 // // if (c == fStartSequence[0])
419 // return (fColumn == scanner.getColumn() ? doEvaluate(scanner, resume) : Token.UNDEFINED);
421 // // return Token.UNDEFINED;
425 // * @see IPredicateRule#getSuccessToken()
428 // public IToken getSuccessToken() {
433 * Detector for empty comments.
435 // static class EmptyCommentDetector implements IWordDetector {
438 // * Method declared on IWordDetector
440 // public boolean isWordStart(char c) {
441 // return (c == '/');
445 // * Method declared on IWordDetector
447 // public boolean isWordPart(char c) {
448 // return (c == '*' || c == '/');
455 // static class WordPredicateRule extends WordRule implements IPredicateRule {
457 // private IToken fSuccessToken;
459 // public WordPredicateRule(IToken successToken) {
460 // super(new EmptyCommentDetector());
461 // fSuccessToken = successToken;
462 // addWord("/**/", fSuccessToken);
466 // * @see org.eclipse.jface.text.rules.IPredicateRule#evaluate(ICharacterScanner, boolean)
468 // public IToken evaluate(ICharacterScanner scanner, boolean resume) {
469 // return super.evaluate(scanner);
473 // * @see org.eclipse.jface.text.rules.IPredicateRule#getSuccessToken()
475 // public IToken getSuccessToken() {
476 // return fSuccessToken;
481 * Creates the partitioner and sets up the appropriate rules.
483 public PHPPartitionScanner() {
486 // IToken php = new Token(PHP);
487 // IToken html = new Token(HTML);
488 // IToken comment = new Token(HTML_MULTILINE_COMMENT);
490 List rules = new ArrayList();
492 // Add rule for single line comments.
493 // rules.add(new EndOfLineRule("//", Token.UNDEFINED));
495 // Add rule for strings and character constants.
496 // rules.add(new SingleLineRule("\"", "\"", Token.UNDEFINED, '\\'));
497 // rules.add(new SingleLineRule("'", "'", Token.UNDEFINED, '\\'));
499 // Add special case word rule.
500 // rules.add(new WordPredicateRule(comment));
502 // Add rules for multi-line comments and javadoc.
503 //rules.add(new MultiLineRule("/**", "*/", javaDoc));
504 // rules.add(new HTMLMultiLineRule("<", "<?", html));
506 rules.add(new MultiLineRule("<!--", "-->", comment));
507 rules.add(new PHPMultiLineRule("<?\r", "?>", php));
508 rules.add(new PHPMultiLineRule("<?\n", "?>", php));
509 rules.add(new PHPMultiLineRule("<?\t", "?>", php));
510 rules.add(new PHPMultiLineRule("<? ", "?>", php));
511 rules.add(new PHPMultiLineRule("<?php", "?>", php));
512 rules.add(new PHPMultiLineRule("<?PHP", "?>", php));
514 // rules.add(new HTMLPatternRule(html)); // "<", "<?",
515 //Add rule for processing instructions
517 IPredicateRule[] result = new IPredicateRule[rules.size()];
518 rules.toArray(result);
519 setPredicateRules(result);
520 // setDefaultReturnToken(html);
523 // public IToken nextToken() {
525 // if (fContentType == null || fRules == null)
526 // return getNextToken();
528 // fTokenOffset= fOffset;
529 // fColumn= UNDEFINED;
530 // boolean resume= (fPartitionOffset < fOffset);
532 // IPredicateRule rule;
535 // for (int i= 0; i < fRules.length; i++) {
536 // rule= (IPredicateRule) fRules[i];
537 // token= rule.getSuccessToken();
538 // if (fContentType.equals(token.getData())) {
540 // fTokenOffset= fPartitionOffset;
541 // token= rule.evaluate(this, resume);
542 // if (!token.isUndefined()) {
543 // fContentType= null;
549 // fContentType= null;
550 // return getNextToken();
553 // public IToken getNextToken() {
559 // fTokenOffset= fOffset;
560 // fColumn= UNDEFINED;
562 // if (fRules != null) {
563 // for (int i= 0; i < fRules.length; i++) {
564 // token= (fRules[i].evaluate(this));
565 // if (!token.isUndefined())
570 // if (read() == EOF)
573 // return fDefaultReturnToken;