RC2 compatibility

[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / ui / text / spelling / SpellCheckIterator.java
diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/ui/text/spelling/SpellCheckIterator.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/ui/text/spelling/SpellCheckIterator.java

new file mode 100644 (file)

index 0000000..f420cd7
--- /dev/null
+++ b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/ui/text/spelling/SpellCheckIterator.java
@@ -0,0 +1,378 @@
+/*******************************************************************************
+ * Copyright (c) 2000, 2003 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials 
+ * are made available under the terms of the Common Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/cpl-v10.html
+ * 
+ * Contributors:
+ *     IBM Corporation - initial API and implementation
+ *******************************************************************************/
+
+package net.sourceforge.phpdt.internal.ui.text.spelling;
+
+import java.text.BreakIterator;
+import java.util.LinkedList;
+import java.util.Locale;
+
+import net.sourceforge.phpdt.corext.refactoring.nls.NLSElement;
+import net.sourceforge.phpdt.internal.ui.text.phpdoc.IHtmlTagConstants;
+import net.sourceforge.phpdt.internal.ui.text.phpdoc.IJavaDocTagConstants;
+import net.sourceforge.phpdt.internal.ui.text.spelling.engine.DefaultSpellChecker;
+import net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellCheckIterator;
+
+import org.eclipse.jface.text.IDocument;
+import org.eclipse.jface.text.IRegion;
+import org.eclipse.jface.text.TextUtilities;
+
+/**
+ * Iterator to spell-check javadoc comment regions.
+ * 
+ * @since 3.0
+ */
+public class SpellCheckIterator implements ISpellCheckIterator, IJavaDocTagConstants, IHtmlTagConstants {
+
+       /** The content of the region */
+       private final String fContent;
+
+       /** The line delimiter */
+       private final String fDelimiter;
+
+       /** The last token */
+       private String fLastToken= null;
+
+       /** The next break */
+       private int fNext= 1;
+
+       /** The offset of the region */
+       private final int fOffset;
+
+       /** The predecessor break */
+       private int fPredecessor;
+
+       /** The previous break */
+       private int fPrevious= 0;
+
+       /** The sentence breaks */
+       private final LinkedList fSentenceBreaks= new LinkedList();
+
+       /** Does the current word start a sentence? */
+       private boolean fStartsSentence= false;
+
+       /** The successor break */
+       private int fSuccessor;
+
+       /** The word iterator */
+       private final BreakIterator fWordIterator;
+
+       /**
+        * Creates a new spell check iterator.
+        * 
+        * @param document
+        *                   The document containing the specified partition
+        * @param region
+        *                   The region to spell-check
+        * @param locale
+        *                   The locale to use for spell-checking
+        */
+       public SpellCheckIterator(final IDocument document, final IRegion region, final Locale locale) {
+
+               fOffset= region.getOffset();
+               fWordIterator= BreakIterator.getWordInstance(locale);
+               fDelimiter= TextUtilities.getDefaultLineDelimiter(document);
+
+               String content;
+               try {
+
+                       content= document.get(region.getOffset(), region.getLength());
+                       if (content.startsWith(NLSElement.TAG_PREFIX))
+                               content= ""; //$NON-NLS-1$
+
+               } catch (Exception exception) {
+                       content= ""; //$NON-NLS-1$
+               }
+               fContent= content;
+
+               fWordIterator.setText(content);
+               fPredecessor= fWordIterator.first();
+               fSuccessor= fWordIterator.next();
+
+               final BreakIterator iterator= BreakIterator.getSentenceInstance(locale);
+               iterator.setText(content);
+
+               int offset= iterator.current();
+               while (offset != BreakIterator.DONE) {
+
+                       fSentenceBreaks.add(new Integer(offset));
+                       offset= iterator.next();
+               }
+       }
+
+       /*
+        * @see org.eclipse.spelling.done.ISpellCheckIterator#getBegin()
+        */
+       public final int getBegin() {
+               return fPrevious + fOffset;
+       }
+
+       /*
+        * @see org.eclipse.spelling.done.ISpellCheckIterator#getEnd()
+        */
+       public final int getEnd() {
+               return fNext + fOffset - 1;
+       }
+
+       /*
+        * @see java.util.Iterator#hasNext()
+        */
+       public final boolean hasNext() {
+               return fSuccessor != BreakIterator.DONE;
+       }
+
+       /**
+        * Does the specified token consist of at least one letter and digits only?
+        * 
+        * @param begin
+        *                   The begin index
+        * @param end
+        *                   The end index
+        * @return <code>true</code> iff the token consists of digits and at
+        *               least one letter only, <code>false</code> otherwise
+        */
+       protected final boolean isAlphaNumeric(final int begin, final int end) {
+
+               char character= 0;
+
+               boolean letter= false;
+               for (int index= begin; index < end; index++) {
+
+                       character= fContent.charAt(index);
+                       if (Character.isLetter(character))
+                               letter= true;
+
+                       if (!Character.isLetterOrDigit(character))
+                               return false;
+               }
+               return letter;
+       }
+
+       /**
+        * Was the last token a Javadoc tag tag?
+        * 
+        * @param tags
+        *                   The javadoc tags to check
+        * @return <code>true</code> iff the last token was a Javadoc tag, <code>false</code>
+        *               otherwise
+        */
+       protected final boolean isJavadocToken(final String[] tags) {
+
+               if (fLastToken != null) {
+
+                       for (int index= 0; index < tags.length; index++) {
+
+                               if (fLastToken.equals(tags[index]))
+                                       return true;
+                       }
+               }
+               return false;
+       }
+
+       /**
+        * Is the current token a single letter token surrounded by non-whitespace
+        * characters?
+        * 
+        * @param begin
+        *                   The begin index
+        * @return <code>true</code> iff the token is a single letter token,
+        *               <code>false</code> otherwise
+        */
+       protected final boolean isSingleLetter(final int begin) {
+
+               if (begin > 0 && begin < fContent.length() - 1)
+                       return Character.isWhitespace(fContent.charAt(begin - 1)) && Character.isLetter(fContent.charAt(begin)) && Character.isWhitespace(fContent.charAt(begin + 1));
+
+               return false;
+       }
+
+       /**
+        * Does the specified token look like an URL?
+        * 
+        * @param begin
+        *                   The begin index
+        * @return <code>true</code> iff this token look like an URL, <code>false</code>
+        *               otherwise
+        */
+       protected final boolean isUrlToken(final int begin) {
+
+               for (int index= 0; index < DefaultSpellChecker.URL_PREFIXES.length; index++) {
+
+                       if (fContent.startsWith(DefaultSpellChecker.URL_PREFIXES[index], begin))
+                               return true;
+               }
+               return false;
+       }
+
+       /**
+        * Does the specified token consist of whitespace only?
+        * 
+        * @param begin
+        *                   The begin index
+        * @param end
+        *                   The end index
+        * @return <code>true</code> iff the token consists of whitespace only,
+        *               <code>false</code> otherwise
+        */
+       protected final boolean isWhitespace(final int begin, final int end) {
+
+               for (int index= begin; index < end; index++) {
+
+                       if (!Character.isWhitespace(fContent.charAt(index)))
+                               return false;
+               }
+               return true;
+       }
+
+       /*
+        * @see java.util.Iterator#next()
+        */
+       public final Object next() {
+
+               String token= nextToken();
+               while (token == null && fSuccessor != BreakIterator.DONE)
+                       token= nextToken();
+
+               fLastToken= token;
+
+               return token;
+       }
+
+       /**
+        * Advances the end index to the next word break.
+        */
+       protected final void nextBreak() {
+
+               fNext= fSuccessor;
+               fPredecessor= fSuccessor;
+
+               fSuccessor= fWordIterator.next();
+       }
+
+       /**
+        * Returns the next sentence break.
+        * 
+        * @return The next sentence break
+        */
+       protected final int nextSentence() {
+               return ((Integer) fSentenceBreaks.getFirst()).intValue();
+       }
+
+       /**
+        * Determines the next token to be spell-checked.
+        * 
+        * @return The next token to be spell-checked, or <code>null</code> iff
+        *               the next token is not a candidate for spell-checking.
+        */
+       protected String nextToken() {
+
+               String token= null;
+
+               fPrevious= fPredecessor;
+               fStartsSentence= false;
+
+               nextBreak();
+
+               boolean update= false;
+               if (fNext - fPrevious > 0) {
+
+                       if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == JAVADOC_TAG_PREFIX) {
+
+                               nextBreak();
+                               if (Character.isLetter(fContent.charAt(fPrevious + 1))) {
+                                       update= true;
+                                       token= fContent.substring(fPrevious, fNext);
+                               } else
+                                       fPredecessor= fNext;
+
+                       } else if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == HTML_TAG_PREFIX && (Character.isLetter(fContent.charAt(fNext)) || fContent.charAt(fNext) == '/')) {
+
+                               if (fContent.startsWith(HTML_CLOSE_PREFIX, fPrevious))
+                                       nextBreak();
+
+                               nextBreak();
+
+                               if (fSuccessor != BreakIterator.DONE && fContent.charAt(fNext) == HTML_TAG_POSTFIX) {
+
+                                       nextBreak();
+                                       if (fSuccessor != BreakIterator.DONE) {
+                                               update= true;
+                                               token= fContent.substring(fPrevious, fNext);
+                                       }
+                               }
+                       } else if (!isWhitespace(fPrevious, fNext) && isAlphaNumeric(fPrevious, fNext)) {
+
+                               if (isUrlToken(fPrevious))
+                                       skipTokens(fPrevious, ' ');
+                               else if (isJavadocToken(JAVADOC_PARAM_TAGS))
+                                       fLastToken= null;
+                               else if (isJavadocToken(JAVADOC_REFERENCE_TAGS)) {
+                                       fLastToken= null;
+                                       skipTokens(fPrevious, fDelimiter.charAt(0));
+                               } else if (fNext - fPrevious > 1 || isSingleLetter(fPrevious))
+                                       token= fContent.substring(fPrevious, fNext);
+
+                               update= true;
+                       }
+               }
+
+               if (update && fSentenceBreaks.size() > 0) {
+
+                       if (fPrevious >= nextSentence()) {
+
+                               while (fSentenceBreaks.size() > 0 && fPrevious >= nextSentence())
+                                       fSentenceBreaks.removeFirst();
+
+                               fStartsSentence= (fLastToken == null) || (token != null);
+                       }
+               }
+               return token;
+       }
+
+       /*
+        * @see java.util.Iterator#remove()
+        */
+       public final void remove() {
+               throw new UnsupportedOperationException();
+       }
+
+       /**
+        * Skip the tokens until the stop character is reached.
+        * 
+        * @param begin
+        *                   The begin index
+        * @param stop
+        *                   The stop character
+        */
+       protected final void skipTokens(final int begin, final char stop) {
+
+               int end= begin;
+
+               while (end < fContent.length() && fContent.charAt(end) != stop)
+                       end++;
+
+               if (end < fContent.length()) {
+
+                       fNext= end;
+                       fPredecessor= fNext;
+
+                       fSuccessor= fWordIterator.following(fNext);
+               } else
+                       fSuccessor= BreakIterator.DONE;
+       }
+
+       /*
+        * @see org.eclipse.spelling.done.ISpellCheckIterator#startsSentence()
+        */
+       public final boolean startsSentence() {
+               return fStartsSentence;
+       }
+}