1 /*******************************************************************************
2 * Copyright (c) 2000, 2004 IBM Corporation and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v10.html
9 * IBM Corporation - initial API and implementation
10 *******************************************************************************/
11 package net.sourceforge.phpdt.internal.ui.text;
13 import java.util.Arrays;
15 import net.sourceforge.phpdt.internal.compiler.parser.Scanner;
16 import net.sourceforge.phpeclipse.phpeditor.php.PHPDocumentPartitioner;
19 //import org.eclipse.jface.text.Assert;
20 import org.eclipse.core.runtime.Assert;
21 import org.eclipse.jface.text.BadLocationException;
22 import org.eclipse.jface.text.IDocument;
23 //import org.eclipse.jface.text.IRegion;
24 import org.eclipse.jface.text.ITypedRegion;
25 //import org.eclipse.jface.text.Region;
26 import org.eclipse.jface.text.TextUtilities;
29 * Utility methods for heuristic based Java manipulations in an incomplete Java
33 * An instance holds some internal position in the document and is therefore not
39 public class JavaHeuristicScanner implements Symbols {
41 * Returned by all methods when the requested position could not be found,
42 * or if a {@link BadLocationException} was thrown while scanning.
44 public static final int NOT_FOUND = -1;
47 * Special bound parameter that means either -1 (backward scanning) or
48 * <code>fDocument.getLength()</code> (forward scanning).
50 public static final int UNBOUND = -2;
52 /* character constants */
53 private static final char LBRACE = '{';
55 private static final char RBRACE = '}';
57 private static final char LPAREN = '(';
59 private static final char RPAREN = ')';
61 private static final char SEMICOLON = ';';
63 private static final char COLON = ':';
65 private static final char COMMA = ',';
67 private static final char LBRACKET = '[';
69 private static final char RBRACKET = ']';
71 private static final char QUESTIONMARK = '?';
73 private static final char EQUAL = '=';
76 * Specifies the stop condition, upon which the <code>scanXXX</code>
77 * methods will decide whether to keep scanning or not. This interface may
78 * implemented by clients.
80 public interface StopCondition {
82 * Instructs the scanner to return the current position.
85 * the char at the current position
87 * the current position
89 * the iteration direction
90 * @return <code>true</code> if the stop condition is met.
92 boolean stop(char ch, int position, boolean forward);
96 * Stops upon a non-whitespace (as defined by
97 * {@link Character#isWhitespace(char)}) character.
99 private static class NonWhitespace implements StopCondition {
101 * @see net.sourceforge.phpdt.internal.ui.text.JavaHeuristicScanner.StopCondition#stop(char)
103 public boolean stop(char ch, int position, boolean forward) {
104 return !Character.isWhitespace(ch);
109 * Stops upon a non-whitespace character in the default partition.
113 private class NonWhitespaceDefaultPartition extends NonWhitespace {
115 * @see net.sourceforge.phpdt.internal.ui.text.JavaHeuristicScanner.StopCondition#stop(char)
117 public boolean stop(char ch, int position, boolean forward) {
118 return super.stop(ch, position, true)
119 && isDefaultPartition(position);
124 * Stops upon a non-java identifier (as defined by
125 * {@link Scanner#isPHPIdentifierPart(char)}) character.
127 private static class NonJavaIdentifierPart implements StopCondition {
129 * @see net.sourceforge.phpdt.internal.ui.text.JavaHeuristicScanner.StopCondition#stop(char)
131 public boolean stop(char ch, int position, boolean forward) {
132 return !Scanner.isPHPIdentifierPart(ch);
137 * Stops upon a non-java identifier character in the default partition.
139 * @see NonJavaIdentifierPart
141 private class NonJavaIdentifierPartDefaultPartition extends
142 NonJavaIdentifierPart {
144 * @see net.sourceforge.phpdt.internal.ui.text.JavaHeuristicScanner.StopCondition#stop(char)
146 public boolean stop(char ch, int position, boolean forward) {
147 return super.stop(ch, position, true)
148 || !isDefaultPartition(position);
153 * Stops upon a character in the default partition that matches the given
156 private class CharacterMatch implements StopCondition {
157 private final char[] fChars;
160 * Creates a new instance.
163 * the single character to match
165 public CharacterMatch(char ch) {
166 this(new char[] { ch });
170 * Creates a new instance.
173 * the chars to match.
175 public CharacterMatch(char[] chars) {
176 Assert.isNotNull(chars);
177 Assert.isTrue(chars.length > 0);
183 * @see net.sourceforge.phpdt.internal.ui.text.JavaHeuristicScanner.StopCondition#stop(char,
186 public boolean stop(char ch, int position, boolean forward) {
187 return Arrays.binarySearch(fChars, ch) >= 0
188 && isDefaultPartition(position);
193 * Acts like character match, but skips all scopes introduced by
194 * parenthesis, brackets, and braces.
196 protected class SkippingScopeMatch extends CharacterMatch {
197 private char fOpening, fClosing;
199 private int fDepth = 0;
202 * Creates a new instance.
205 * the single character to match
207 public SkippingScopeMatch(char ch) {
212 * Creates a new instance.
215 * the chars to match.
217 public SkippingScopeMatch(char[] chars) {
222 * @see net.sourceforge.phpdt.internal.ui.text.JavaHeuristicScanner.StopCondition#stop(char,
225 public boolean stop(char ch, int position, boolean forward) {
227 if (fDepth == 0 && super.stop(ch, position, true))
229 else if (ch == fOpening)
231 else if (ch == fClosing) {
237 } else if (fDepth == 0) {
281 /** The document being scanned. */
282 private IDocument fDocument;
284 /** The partitioning being used for scanning. */
285 private String fPartitioning;
287 /** The partition to scan in. */
288 private String fPartition;
290 /* internal scan state */
292 /** the most recently read character. */
295 /** the most recently read position. */
298 /* preset stop conditions */
299 private final StopCondition fNonWSDefaultPart = new NonWhitespaceDefaultPartition();
301 private final static StopCondition fNonWS = new NonWhitespace();
303 private final StopCondition fNonIdent = new NonJavaIdentifierPartDefaultPartition();
306 * Creates a new instance.
309 * the document to scan
310 * @param partitioning
311 * the partitioning to use for scanning
313 * the partition to scan in
315 public JavaHeuristicScanner(IDocument document, String partitioning,
317 Assert.isNotNull(document);
318 Assert.isNotNull(partitioning);
319 Assert.isNotNull(partition);
320 fDocument = document;
321 fPartitioning = partitioning;
322 fPartition = partition;
327 * <code>this(document, IJavaPartitions.JAVA_PARTITIONING, IDocument.DEFAULT_CONTENT_TYPE)</code>.
330 * the document to scan.
332 public JavaHeuristicScanner(IDocument document) {
333 // this(document, IPHPPartitions.PHP_PARTITIONING,
334 // IDocument.DEFAULT_CONTENT_TYPE);
335 this(document, IPHPPartitions.PHP_PARTITIONING,
336 PHPDocumentPartitioner.PHP_SCRIPT_CODE);
340 * Returns the most recent internal scan position.
342 * @return the most recent internal scan position.
344 public int getPosition() {
349 * Returns the next token in forward direction, starting at
350 * <code>start</code>, and not extending further than <code>bound</code>.
351 * The return value is one of the constants defined in {@link Symbols}.
352 * After a call, {@link #getPosition()} will return the position just after
353 * the scanned token (i.e. the next position that will be scanned).
356 * the first character position in the document to consider
358 * the first position not to consider any more
359 * @return a constant from {@link Symbols} describing the next token
361 public int nextToken(int start, int bound) {
362 int pos = scanForward(start, bound, fNonWSDefaultPart);
363 if (pos == NOT_FOUND)
374 return TokenLBRACKET;
376 return TokenRBRACKET;
382 return TokenSEMICOLON;
386 return TokenQUESTIONMARK;
389 if (fDocument.getChar(fPos) == '>') {
393 } catch (BadLocationException e) {
398 if (fDocument.get(fPos, 4).equalsIgnoreCase("?php")) {
401 } else if (fDocument.getChar(fPos) == '?') {
405 } catch (BadLocationException e) {
410 if (Scanner.isPHPIdentifierPart(fChar)) {
411 // assume an ident or keyword
413 pos = scanForward(pos + 1, bound, fNonIdent);
414 if (pos == NOT_FOUND)
415 to = bound == UNBOUND ? fDocument.getLength() : bound;
419 String identOrKeyword;
421 identOrKeyword = fDocument.get(from, to - from);
422 } catch (BadLocationException e) {
426 return getToken(identOrKeyword);
429 // operators, number literals etc
435 * Returns the next token in backward direction, starting at
436 * <code>start</code>, and not extending further than <code>bound</code>.
437 * The return value is one of the constants defined in {@link Symbols}.
438 * After a call, {@link #getPosition()} will return the position just before
439 * the scanned token starts (i.e. the next position that will be scanned).
442 * the first character position in the document to consider
444 * the first position not to consider any more
445 * @return a constant from {@link Symbols} describing the previous token
447 public int previousToken(int start, int bound) {
448 int pos = scanBackward(start, bound, fNonWSDefaultPart);
449 if (pos == NOT_FOUND)
460 return TokenLBRACKET;
462 return TokenRBRACKET;
468 return TokenSEMICOLON;
474 return TokenQUESTIONMARK;
479 switch (fDocument.getChar(fPos)) {
487 } catch (BadLocationException e) {
492 if (Scanner.isPHPIdentifierPart(fChar)) {
493 // assume an ident or keyword
494 int from, to = pos + 1;
495 pos = scanBackward(pos - 1, bound, fNonIdent);
496 if (pos == NOT_FOUND)
497 from = bound == UNBOUND ? 0 : bound + 1;
501 String identOrKeyword;
503 identOrKeyword = fDocument.get(from, to - from);
504 } catch (BadLocationException e) {
508 return getToken(identOrKeyword);
511 // operators, number literals etc
518 * Returns one of the keyword constants or <code>TokenIDENT</code> for a
519 * scanned identifier.
522 * a scanned identifier
523 * @return one of the constants defined in {@link Symbols}
525 private int getToken(String s) {
528 switch (s.length()) {
530 if ("if".equals(s)) //$NON-NLS-1$
532 if ("do".equals(s)) //$NON-NLS-1$
536 if ("for".equals(s)) //$NON-NLS-1$
538 if ("try".equals(s)) //$NON-NLS-1$
540 if ("new".equals(s)) //$NON-NLS-1$
544 if ("case".equals(s)) //$NON-NLS-1$
546 if ("else".equals(s)) //$NON-NLS-1$
548 if ("goto".equals(s)) //$NON-NLS-1$
552 if ("break".equals(s)) //$NON-NLS-1$
554 if ("catch".equals(s)) //$NON-NLS-1$
556 if ("while".equals(s)) //$NON-NLS-1$
560 if ("return".equals(s)) //$NON-NLS-1$
562 if ("static".equals(s)) //$NON-NLS-1$
564 if ("switch".equals(s)) //$NON-NLS-1$
568 if ("default".equals(s)) //$NON-NLS-1$
570 if ("finally".equals(s)) //$NON-NLS-1$
574 if ("synchronized".equals(s)) //$NON-NLS-1$
575 return TokenSYNCHRONIZED;
582 * Returns the position of the closing peer character (forward search). Any
583 * scopes introduced by opening peers are skipped. All peers accounted for
584 * must reside in the default partition.
587 * Note that <code>start</code> must not point to the opening peer, but to
588 * the first character being searched.
594 * the opening peer character (e.g. '{')
596 * the closing peer character (e.g. '}')
597 * @return the matching peer character position, or <code>NOT_FOUND</code>
599 public int findClosingPeer(int start, final char openingPeer,
600 final char closingPeer) {
601 Assert.isNotNull(fDocument);
602 Assert.isTrue(start >= 0);
608 start = scanForward(start + 1, UNBOUND, new CharacterMatch(
609 new char[] { openingPeer, closingPeer }));
610 if (start == NOT_FOUND)
613 if (fDocument.getChar(start) == openingPeer)
622 } catch (BadLocationException e) {
628 * Returns the position of the opening peer character (backward search). Any
629 * scopes introduced by closing peers are skipped. All peers accounted for
630 * must reside in the default partition.
633 * Note that <code>start</code> must not point to the closing peer, but to
634 * the first character being searched.
640 * the opening peer character (e.g. '{')
642 * the closing peer character (e.g. '}')
643 * @return the matching peer character position, or <code>NOT_FOUND</code>
645 public int findOpeningPeer(int start, char openingPeer, char closingPeer) {
646 Assert.isTrue(start < fDocument.getLength());
652 start = scanBackward(start - 1, UNBOUND, new CharacterMatch(
653 new char[] { openingPeer, closingPeer }));
654 if (start == NOT_FOUND)
657 if (fDocument.getChar(start) == closingPeer)
666 } catch (BadLocationException e) {
672 * Computes the surrounding block around <code>offset</code>. The search
673 * is started at the beginning of <code>offset</code>, i.e. an opening
674 * brace at <code>offset</code> will not be part of the surrounding block,
675 * but a closing brace will.
678 * the offset for which the surrounding block is computed
679 * @return a region describing the surrounding block, or <code>null</code>
680 * if none can be found
682 // public IRegion findSurroundingBlock(int offset) {
683 // if (offset < 1 || offset >= fDocument.getLength())
686 // int begin = findOpeningPeer(offset - 1, LBRACE, RBRACE);
687 // int end = findClosingPeer(offset, LBRACE, RBRACE);
688 // if (begin == NOT_FOUND || end == NOT_FOUND)
690 // return new Region(begin, end + 1 - begin);
694 * Finds the smallest position in <code>fDocument</code> such that the
695 * position is >= <code>position</code> and < <code>bound</code>
696 * and <code>Character.isWhitespace(fDocument.getChar(pos))</code>
697 * evaluates to <code>false</code> and the position is in the default
701 * the first character position in <code>fDocument</code> to be
704 * the first position in <code>fDocument</code> to not consider
705 * any more, with <code>bound</code> > <code>position</code>,
706 * or <code>UNBOUND</code>
707 * @return the smallest position of a non-whitespace character in [<code>position</code>,
708 * <code>bound</code>) that resides in a Java partition, or
709 * <code>NOT_FOUND</code> if none can be found
711 // public int findNonWhitespaceForward(int position, int bound) {
712 // return scanForward(position, bound, fNonWSDefaultPart);
716 * Finds the smallest position in <code>fDocument</code> such that the
717 * position is >= <code>position</code> and < <code>bound</code>
718 * and <code>Character.isWhitespace(fDocument.getChar(pos))</code>
719 * evaluates to <code>false</code>.
722 * the first character position in <code>fDocument</code> to be
725 * the first position in <code>fDocument</code> to not consider
726 * any more, with <code>bound</code> > <code>position</code>,
727 * or <code>UNBOUND</code>
728 * @return the smallest position of a non-whitespace character in [<code>position</code>,
729 * <code>bound</code>), or <code>NOT_FOUND</code> if none can
732 public int findNonWhitespaceForwardInAnyPartition(int position, int bound) {
733 return scanForward(position, bound, fNonWS);
737 * Finds the highest position in <code>fDocument</code> such that the
738 * position is <= <code>position</code> and > <code>bound</code>
739 * and <code>Character.isWhitespace(fDocument.getChar(pos))</code>
740 * evaluates to <code>false</code> and the position is in the default
744 * the first character position in <code>fDocument</code> to be
747 * the first position in <code>fDocument</code> to not consider
748 * any more, with <code>bound</code> < <code>position</code>,
749 * or <code>UNBOUND</code>
750 * @return the highest position of a non-whitespace character in (<code>bound</code>,
751 * <code>position</code>] that resides in a Java partition, or
752 * <code>NOT_FOUND</code> if none can be found
754 // public int findNonWhitespaceBackward(int position, int bound) {
755 // return scanBackward(position, bound, fNonWSDefaultPart);
759 * Finds the lowest position <code>p</code> in <code>fDocument</code>
760 * such that <code>start</code> <= p < <code>bound</code> and
761 * <code>condition.stop(fDocument.getChar(p), p)</code> evaluates to
765 * the first character position in <code>fDocument</code> to be
768 * the first position in <code>fDocument</code> to not consider
769 * any more, with <code>bound</code> > <code>start</code>,
770 * or <code>UNBOUND</code>
772 * the <code>StopCondition</code> to check
773 * @return the lowest position in [<code>start</code>,
774 * <code>bound</code>) for which <code>condition</code> holds,
775 * or <code>NOT_FOUND</code> if none can be found
777 public int scanForward(int start, int bound, StopCondition condition) {
778 Assert.isTrue(start >= 0);
780 if (bound == UNBOUND)
781 bound = fDocument.getLength();
783 Assert.isTrue(bound <= fDocument.getLength());
787 while (fPos < bound) {
789 fChar = fDocument.getChar(fPos);
792 if (fPos < fDocument.getLength() - 1) {
793 if (fDocument.get(fPos - 1, 2).equalsIgnoreCase("?>")) {
799 if (condition.stop(fChar, fPos, true))
804 } catch (BadLocationException e) {
810 * Finds the lowest position in <code>fDocument</code> such that the
811 * position is >= <code>position</code> and < <code>bound</code>
812 * and <code>fDocument.getChar(position) == ch</code> evaluates to
813 * <code>true</code> and the position is in the default partition.
816 * the first character position in <code>fDocument</code> to be
819 * the first position in <code>fDocument</code> to not consider
820 * any more, with <code>bound</code> > <code>position</code>,
821 * or <code>UNBOUND</code>
823 * the <code>char</code> to search for
824 * @return the lowest position of <code>ch</code> in (<code>bound</code>,
825 * <code>position</code>] that resides in a Java partition, or
826 * <code>NOT_FOUND</code> if none can be found
828 // public int scanForward(int position, int bound, char ch) {
829 // return scanForward(position, bound, new CharacterMatch(ch));
833 * Finds the lowest position in <code>fDocument</code> such that the
834 * position is >= <code>position</code> and < <code>bound</code>
835 * and <code>fDocument.getChar(position) == ch</code> evaluates to
836 * <code>true</code> for at least one ch in <code>chars</code> and the
837 * position is in the default partition.
840 * the first character position in <code>fDocument</code> to be
843 * the first position in <code>fDocument</code> to not consider
844 * any more, with <code>bound</code> > <code>position</code>,
845 * or <code>UNBOUND</code>
847 * an array of <code>char</code> to search for
848 * @return the lowest position of a non-whitespace character in [<code>position</code>,
849 * <code>bound</code>) that resides in a Java partition, or
850 * <code>NOT_FOUND</code> if none can be found
852 // public int scanForward(int position, int bound, char[] chars) {
853 // return scanForward(position, bound, new CharacterMatch(chars));
857 * Finds the highest position <code>p</code> in <code>fDocument</code>
858 * such that <code>bound</code> < <code>p</code> <=
859 * <code>start</code> and
860 * <code>condition.stop(fDocument.getChar(p), p)</code> evaluates to
864 * the first character position in <code>fDocument</code> to be
867 * the first position in <code>fDocument</code> to not consider
868 * any more, with <code>bound</code> < <code>start</code>,
869 * or <code>UNBOUND</code>
871 * the <code>StopCondition</code> to check
872 * @return the highest position in (<code>bound</code>,
873 * <code>start</code> for which <code>condition</code> holds, or
874 * <code>NOT_FOUND</code> if none can be found
876 public int scanBackward(int start, int bound, StopCondition condition) {
877 if (bound == UNBOUND)
880 Assert.isTrue(bound >= -1);
881 Assert.isTrue(start < fDocument.getLength());
885 while (fPos > bound) {
887 fChar = fDocument.getChar(fPos);
889 if (fChar == 'p' || fChar == 'P') {
891 if (fDocument.get(fPos - 4, 5).equalsIgnoreCase("<?php")) {
896 } else if (fChar == '?') {
898 if (fDocument.get(fPos - 1, 2).equalsIgnoreCase("<?")) {
904 if (condition.stop(fChar, fPos, false))
909 } catch (BadLocationException e) {
915 * Finds the highest position in <code>fDocument</code> such that the
916 * position is <= <code>position</code> and > <code>bound</code>
917 * and <code>fDocument.getChar(position) == ch</code> evaluates to
918 * <code>true</code> for at least one ch in <code>chars</code> and the
919 * position is in the default partition.
922 * the first character position in <code>fDocument</code> to be
925 * the first position in <code>fDocument</code> to not consider
926 * any more, with <code>bound</code> < <code>position</code>,
927 * or <code>UNBOUND</code>
929 * the <code>char</code> to search for
930 * @return the highest position of one element in <code>chars</code> in (<code>bound</code>,
931 * <code>position</code>] that resides in a Java partition, or
932 * <code>NOT_FOUND</code> if none can be found
934 // public int scanBackward(int position, int bound, char ch) {
935 // return scanBackward(position, bound, new CharacterMatch(ch));
939 * Finds the highest position in <code>fDocument</code> such that the
940 * position is <= <code>position</code> and > <code>bound</code>
941 * and <code>fDocument.getChar(position) == ch</code> evaluates to
942 * <code>true</code> for at least one ch in <code>chars</code> and the
943 * position is in the default partition.
946 * the first character position in <code>fDocument</code> to be
949 * the first position in <code>fDocument</code> to not consider
950 * any more, with <code>bound</code> < <code>position</code>,
951 * or <code>UNBOUND</code>
953 * an array of <code>char</code> to search for
954 * @return the highest position of one element in <code>chars</code> in (<code>bound</code>,
955 * <code>position</code>] that resides in a Java partition, or
956 * <code>NOT_FOUND</code> if none can be found
958 // public int scanBackward(int position, int bound, char[] chars) {
959 // return scanBackward(position, bound, new CharacterMatch(chars));
963 * Checks whether <code>position</code> resides in a default (Java)
964 * partition of <code>fDocument</code>.
967 * the position to be checked
968 * @return <code>true</code> if <code>position</code> is in the default
969 * partition of <code>fDocument</code>, <code>false</code>
972 public boolean isDefaultPartition(int position) {
973 Assert.isTrue(position >= 0);
974 Assert.isTrue(position <= fDocument.getLength());
977 ITypedRegion region = TextUtilities.getPartition(fDocument,
978 fPartitioning, position, false);
979 return region.getType().equals(fPartition);
981 } catch (BadLocationException e) {
988 * Checks if the line seems to be an open condition not followed by a block
989 * (i.e. an if, while, or for statement with just one following statement,
990 * see example below).
998 * Algorithm: if the last non-WS, non-Comment code on the line is an if
999 * (condition), while (condition), for( expression), do, else, and there is
1000 * no statement after that
1004 * the insert position of the new character
1006 * the lowest position to consider
1007 * @return <code>true</code> if the code is a conditional statement or
1008 * loop without a block, <code>false</code> otherwise
1010 public boolean isBracelessBlockStart(int position, int bound) {
1014 switch (previousToken(position, bound)) {
1019 position = findOpeningPeer(fPos, LPAREN, RPAREN);
1021 switch (previousToken(position - 1, bound)) {