--- /dev/null
+/*******************************************************************************
+ * Copyright (c) 2000, 2003 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Common Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/cpl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ *******************************************************************************/
+
+package net.sourceforge.phpdt.internal.ui.text.spelling;
+
+import java.text.BreakIterator;
+import java.util.LinkedList;
+import java.util.Locale;
+
+import net.sourceforge.phpdt.corext.refactoring.nls.NLSElement;
+import net.sourceforge.phpdt.internal.ui.text.phpdoc.IHtmlTagConstants;
+import net.sourceforge.phpdt.internal.ui.text.phpdoc.IJavaDocTagConstants;
+import net.sourceforge.phpdt.internal.ui.text.spelling.engine.DefaultSpellChecker;
+import net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellCheckIterator;
+
+import org.eclipse.jface.text.IDocument;
+import org.eclipse.jface.text.IRegion;
+import org.eclipse.jface.text.TextUtilities;
+
+/**
+ * Iterator to spell-check javadoc comment regions.
+ *
+ * @since 3.0
+ */
+public class SpellCheckIterator implements ISpellCheckIterator, IJavaDocTagConstants, IHtmlTagConstants {
+
+ /** The content of the region */
+ private final String fContent;
+
+ /** The line delimiter */
+ private final String fDelimiter;
+
+ /** The last token */
+ private String fLastToken= null;
+
+ /** The next break */
+ private int fNext= 1;
+
+ /** The offset of the region */
+ private final int fOffset;
+
+ /** The predecessor break */
+ private int fPredecessor;
+
+ /** The previous break */
+ private int fPrevious= 0;
+
+ /** The sentence breaks */
+ private final LinkedList fSentenceBreaks= new LinkedList();
+
+ /** Does the current word start a sentence? */
+ private boolean fStartsSentence= false;
+
+ /** The successor break */
+ private int fSuccessor;
+
+ /** The word iterator */
+ private final BreakIterator fWordIterator;
+
+ /**
+ * Creates a new spell check iterator.
+ *
+ * @param document
+ * The document containing the specified partition
+ * @param region
+ * The region to spell-check
+ * @param locale
+ * The locale to use for spell-checking
+ */
+ public SpellCheckIterator(final IDocument document, final IRegion region, final Locale locale) {
+
+ fOffset= region.getOffset();
+ fWordIterator= BreakIterator.getWordInstance(locale);
+ fDelimiter= TextUtilities.getDefaultLineDelimiter(document);
+
+ String content;
+ try {
+
+ content= document.get(region.getOffset(), region.getLength());
+ if (content.startsWith(NLSElement.TAG_PREFIX))
+ content= ""; //$NON-NLS-1$
+
+ } catch (Exception exception) {
+ content= ""; //$NON-NLS-1$
+ }
+ fContent= content;
+
+ fWordIterator.setText(content);
+ fPredecessor= fWordIterator.first();
+ fSuccessor= fWordIterator.next();
+
+ final BreakIterator iterator= BreakIterator.getSentenceInstance(locale);
+ iterator.setText(content);
+
+ int offset= iterator.current();
+ while (offset != BreakIterator.DONE) {
+
+ fSentenceBreaks.add(new Integer(offset));
+ offset= iterator.next();
+ }
+ }
+
+ /*
+ * @see org.eclipse.spelling.done.ISpellCheckIterator#getBegin()
+ */
+ public final int getBegin() {
+ return fPrevious + fOffset;
+ }
+
+ /*
+ * @see org.eclipse.spelling.done.ISpellCheckIterator#getEnd()
+ */
+ public final int getEnd() {
+ return fNext + fOffset - 1;
+ }
+
+ /*
+ * @see java.util.Iterator#hasNext()
+ */
+ public final boolean hasNext() {
+ return fSuccessor != BreakIterator.DONE;
+ }
+
+ /**
+ * Does the specified token consist of at least one letter and digits only?
+ *
+ * @param begin
+ * The begin index
+ * @param end
+ * The end index
+ * @return <code>true</code> iff the token consists of digits and at
+ * least one letter only, <code>false</code> otherwise
+ */
+ protected final boolean isAlphaNumeric(final int begin, final int end) {
+
+ char character= 0;
+
+ boolean letter= false;
+ for (int index= begin; index < end; index++) {
+
+ character= fContent.charAt(index);
+ if (Character.isLetter(character))
+ letter= true;
+
+ if (!Character.isLetterOrDigit(character))
+ return false;
+ }
+ return letter;
+ }
+
+ /**
+ * Was the last token a Javadoc tag tag?
+ *
+ * @param tags
+ * The javadoc tags to check
+ * @return <code>true</code> iff the last token was a Javadoc tag, <code>false</code>
+ * otherwise
+ */
+ protected final boolean isJavadocToken(final String[] tags) {
+
+ if (fLastToken != null) {
+
+ for (int index= 0; index < tags.length; index++) {
+
+ if (fLastToken.equals(tags[index]))
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Is the current token a single letter token surrounded by non-whitespace
+ * characters?
+ *
+ * @param begin
+ * The begin index
+ * @return <code>true</code> iff the token is a single letter token,
+ * <code>false</code> otherwise
+ */
+ protected final boolean isSingleLetter(final int begin) {
+
+ if (begin > 0 && begin < fContent.length() - 1)
+ return Character.isWhitespace(fContent.charAt(begin - 1)) && Character.isLetter(fContent.charAt(begin)) && Character.isWhitespace(fContent.charAt(begin + 1));
+
+ return false;
+ }
+
+ /**
+ * Does the specified token look like an URL?
+ *
+ * @param begin
+ * The begin index
+ * @return <code>true</code> iff this token look like an URL, <code>false</code>
+ * otherwise
+ */
+ protected final boolean isUrlToken(final int begin) {
+
+ for (int index= 0; index < DefaultSpellChecker.URL_PREFIXES.length; index++) {
+
+ if (fContent.startsWith(DefaultSpellChecker.URL_PREFIXES[index], begin))
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Does the specified token consist of whitespace only?
+ *
+ * @param begin
+ * The begin index
+ * @param end
+ * The end index
+ * @return <code>true</code> iff the token consists of whitespace only,
+ * <code>false</code> otherwise
+ */
+ protected final boolean isWhitespace(final int begin, final int end) {
+
+ for (int index= begin; index < end; index++) {
+
+ if (!Character.isWhitespace(fContent.charAt(index)))
+ return false;
+ }
+ return true;
+ }
+
+ /*
+ * @see java.util.Iterator#next()
+ */
+ public final Object next() {
+
+ String token= nextToken();
+ while (token == null && fSuccessor != BreakIterator.DONE)
+ token= nextToken();
+
+ fLastToken= token;
+
+ return token;
+ }
+
+ /**
+ * Advances the end index to the next word break.
+ */
+ protected final void nextBreak() {
+
+ fNext= fSuccessor;
+ fPredecessor= fSuccessor;
+
+ fSuccessor= fWordIterator.next();
+ }
+
+ /**
+ * Returns the next sentence break.
+ *
+ * @return The next sentence break
+ */
+ protected final int nextSentence() {
+ return ((Integer) fSentenceBreaks.getFirst()).intValue();
+ }
+
+ /**
+ * Determines the next token to be spell-checked.
+ *
+ * @return The next token to be spell-checked, or <code>null</code> iff
+ * the next token is not a candidate for spell-checking.
+ */
+ protected String nextToken() {
+
+ String token= null;
+
+ fPrevious= fPredecessor;
+ fStartsSentence= false;
+
+ nextBreak();
+
+ boolean update= false;
+ if (fNext - fPrevious > 0) {
+
+ if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == JAVADOC_TAG_PREFIX) {
+
+ nextBreak();
+ if (Character.isLetter(fContent.charAt(fPrevious + 1))) {
+ update= true;
+ token= fContent.substring(fPrevious, fNext);
+ } else
+ fPredecessor= fNext;
+
+ } else if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == HTML_TAG_PREFIX && (Character.isLetter(fContent.charAt(fNext)) || fContent.charAt(fNext) == '/')) {
+
+ if (fContent.startsWith(HTML_CLOSE_PREFIX, fPrevious))
+ nextBreak();
+
+ nextBreak();
+
+ if (fSuccessor != BreakIterator.DONE && fContent.charAt(fNext) == HTML_TAG_POSTFIX) {
+
+ nextBreak();
+ if (fSuccessor != BreakIterator.DONE) {
+ update= true;
+ token= fContent.substring(fPrevious, fNext);
+ }
+ }
+ } else if (!isWhitespace(fPrevious, fNext) && isAlphaNumeric(fPrevious, fNext)) {
+
+ if (isUrlToken(fPrevious))
+ skipTokens(fPrevious, ' ');
+ else if (isJavadocToken(JAVADOC_PARAM_TAGS))
+ fLastToken= null;
+ else if (isJavadocToken(JAVADOC_REFERENCE_TAGS)) {
+ fLastToken= null;
+ skipTokens(fPrevious, fDelimiter.charAt(0));
+ } else if (fNext - fPrevious > 1 || isSingleLetter(fPrevious))
+ token= fContent.substring(fPrevious, fNext);
+
+ update= true;
+ }
+ }
+
+ if (update && fSentenceBreaks.size() > 0) {
+
+ if (fPrevious >= nextSentence()) {
+
+ while (fSentenceBreaks.size() > 0 && fPrevious >= nextSentence())
+ fSentenceBreaks.removeFirst();
+
+ fStartsSentence= (fLastToken == null) || (token != null);
+ }
+ }
+ return token;
+ }
+
+ /*
+ * @see java.util.Iterator#remove()
+ */
+ public final void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Skip the tokens until the stop character is reached.
+ *
+ * @param begin
+ * The begin index
+ * @param stop
+ * The stop character
+ */
+ protected final void skipTokens(final int begin, final char stop) {
+
+ int end= begin;
+
+ while (end < fContent.length() && fContent.charAt(end) != stop)
+ end++;
+
+ if (end < fContent.length()) {
+
+ fNext= end;
+ fPredecessor= fNext;
+
+ fSuccessor= fWordIterator.following(fNext);
+ } else
+ fSuccessor= BreakIterator.DONE;
+ }
+
+ /*
+ * @see org.eclipse.spelling.done.ISpellCheckIterator#startsSentence()
+ */
+ public final boolean startsSentence() {
+ return fStartsSentence;
+ }
+}