/*******************************************************************************
* Copyright (c) 2000, 2003 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Common Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/cpl-v10.html
*
* Contributors:
* IBM Corporation - initial API and implementation
*******************************************************************************/
package net.sourceforge.phpdt.internal.ui.text.spelling;
import java.text.BreakIterator;
import java.util.LinkedList;
import java.util.Locale;
import net.sourceforge.phpdt.corext.refactoring.nls.NLSElement;
import net.sourceforge.phpdt.internal.ui.text.phpdoc.IHtmlTagConstants;
import net.sourceforge.phpdt.internal.ui.text.phpdoc.IJavaDocTagConstants;
import net.sourceforge.phpdt.internal.ui.text.spelling.engine.DefaultSpellChecker;
import net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellCheckIterator;
import org.eclipse.jface.text.IDocument;
import org.eclipse.jface.text.IRegion;
import org.eclipse.jface.text.TextUtilities;
/**
* Iterator to spell-check javadoc comment regions.
*
* @since 3.0
*/
public class SpellCheckIterator implements ISpellCheckIterator,
IJavaDocTagConstants, IHtmlTagConstants {
/** The content of the region */
private final String fContent;
/** The line delimiter */
private final String fDelimiter;
/** The last token */
private String fLastToken = null;
/** The next break */
private int fNext = 1;
/** The offset of the region */
private final int fOffset;
/** The predecessor break */
private int fPredecessor;
/** The previous break */
private int fPrevious = 0;
/** The sentence breaks */
private final LinkedList fSentenceBreaks = new LinkedList();
/** Does the current word start a sentence? */
private boolean fStartsSentence = false;
/** The successor break */
private int fSuccessor;
/** The word iterator */
private final BreakIterator fWordIterator;
/**
* Creates a new spell check iterator.
*
* @param document
* The document containing the specified partition
* @param region
* The region to spell-check
* @param locale
* The locale to use for spell-checking
*/
public SpellCheckIterator(final IDocument document, final IRegion region,
final Locale locale) {
fOffset = region.getOffset();
fWordIterator = BreakIterator.getWordInstance(locale);
fDelimiter = TextUtilities.getDefaultLineDelimiter(document);
String content;
try {
content = document.get(region.getOffset(), region.getLength());
if (content.startsWith(NLSElement.TAG_PREFIX))
content = ""; //$NON-NLS-1$
} catch (Exception exception) {
content = ""; //$NON-NLS-1$
}
fContent = content;
fWordIterator.setText(content);
fPredecessor = fWordIterator.first();
fSuccessor = fWordIterator.next();
final BreakIterator iterator = BreakIterator
.getSentenceInstance(locale);
iterator.setText(content);
int offset = iterator.current();
while (offset != BreakIterator.DONE) {
fSentenceBreaks.add(new Integer(offset));
offset = iterator.next();
}
}
/*
* @see org.eclipse.spelling.done.ISpellCheckIterator#getBegin()
*/
public final int getBegin() {
return fPrevious + fOffset;
}
/*
* @see org.eclipse.spelling.done.ISpellCheckIterator#getEnd()
*/
public final int getEnd() {
return fNext + fOffset - 1;
}
/*
* @see java.util.Iterator#hasNext()
*/
public final boolean hasNext() {
return fSuccessor != BreakIterator.DONE;
}
/**
* Does the specified token consist of at least one letter and digits only?
*
* @param begin
* The begin index
* @param end
* The end index
* @return true
iff the token consists of digits and at least
* one letter only, false
otherwise
*/
protected final boolean isAlphaNumeric(final int begin, final int end) {
char character = 0;
boolean letter = false;
for (int index = begin; index < end; index++) {
character = fContent.charAt(index);
if (Character.isLetter(character))
letter = true;
if (!Character.isLetterOrDigit(character))
return false;
}
return letter;
}
/**
* Was the last token a Javadoc tag tag?
*
* @param tags
* The javadoc tags to check
* @return true
iff the last token was a Javadoc tag,
* false
otherwise
*/
protected final boolean isJavadocToken(final String[] tags) {
if (fLastToken != null) {
for (int index = 0; index < tags.length; index++) {
if (fLastToken.equals(tags[index]))
return true;
}
}
return false;
}
/**
* Is the current token a single letter token surrounded by non-whitespace
* characters?
*
* @param begin
* The begin index
* @return true
iff the token is a single letter token,
* false
otherwise
*/
protected final boolean isSingleLetter(final int begin) {
if (begin > 0 && begin < fContent.length() - 1)
return Character.isWhitespace(fContent.charAt(begin - 1))
&& Character.isLetter(fContent.charAt(begin))
&& Character.isWhitespace(fContent.charAt(begin + 1));
return false;
}
/**
* Does the specified token look like an URL?
*
* @param begin
* The begin index
* @return true
iff this token look like an URL,
* false
otherwise
*/
protected final boolean isUrlToken(final int begin) {
for (int index = 0; index < DefaultSpellChecker.URL_PREFIXES.length; index++) {
if (fContent.startsWith(DefaultSpellChecker.URL_PREFIXES[index],
begin))
return true;
}
return false;
}
/**
* Does the specified token consist of whitespace only?
*
* @param begin
* The begin index
* @param end
* The end index
* @return true
iff the token consists of whitespace only,
* false
otherwise
*/
protected final boolean isWhitespace(final int begin, final int end) {
for (int index = begin; index < end; index++) {
if (!Character.isWhitespace(fContent.charAt(index)))
return false;
}
return true;
}
/*
* @see java.util.Iterator#next()
*/
public final Object next() {
String token = nextToken();
while (token == null && fSuccessor != BreakIterator.DONE)
token = nextToken();
fLastToken = token;
return token;
}
/**
* Advances the end index to the next word break.
*/
protected final void nextBreak() {
fNext = fSuccessor;
fPredecessor = fSuccessor;
fSuccessor = fWordIterator.next();
}
/**
* Returns the next sentence break.
*
* @return The next sentence break
*/
protected final int nextSentence() {
return ((Integer) fSentenceBreaks.getFirst()).intValue();
}
/**
* Determines the next token to be spell-checked.
*
* @return The next token to be spell-checked, or null
iff
* the next token is not a candidate for spell-checking.
*/
protected String nextToken() {
String token = null;
fPrevious = fPredecessor;
fStartsSentence = false;
nextBreak();
boolean update = false;
if (fNext - fPrevious > 0) {
if (fSuccessor != BreakIterator.DONE
&& fContent.charAt(fPrevious) == JAVADOC_TAG_PREFIX) {
nextBreak();
if (Character.isLetter(fContent.charAt(fPrevious + 1))) {
update = true;
token = fContent.substring(fPrevious, fNext);
} else
fPredecessor = fNext;
} else if (fSuccessor != BreakIterator.DONE
&& fContent.charAt(fPrevious) == HTML_TAG_PREFIX
&& (Character.isLetter(fContent.charAt(fNext)) || fContent
.charAt(fNext) == '/')) {
if (fContent.startsWith(HTML_CLOSE_PREFIX, fPrevious))
nextBreak();
nextBreak();
if (fSuccessor != BreakIterator.DONE
&& fContent.charAt(fNext) == HTML_TAG_POSTFIX) {
nextBreak();
if (fSuccessor != BreakIterator.DONE) {
update = true;
token = fContent.substring(fPrevious, fNext);
}
}
} else if (!isWhitespace(fPrevious, fNext)
&& isAlphaNumeric(fPrevious, fNext)) {
if (isUrlToken(fPrevious))
skipTokens(fPrevious, ' ');
else if (isJavadocToken(JAVADOC_PARAM_TAGS))
fLastToken = null;
else if (isJavadocToken(JAVADOC_REFERENCE_TAGS)) {
fLastToken = null;
skipTokens(fPrevious, fDelimiter.charAt(0));
} else if (fNext - fPrevious > 1 || isSingleLetter(fPrevious))
token = fContent.substring(fPrevious, fNext);
update = true;
}
}
if (update && fSentenceBreaks.size() > 0) {
if (fPrevious >= nextSentence()) {
while (fSentenceBreaks.size() > 0
&& fPrevious >= nextSentence())
fSentenceBreaks.removeFirst();
fStartsSentence = (fLastToken == null) || (token != null);
}
}
return token;
}
/*
* @see java.util.Iterator#remove()
*/
public final void remove() {
throw new UnsupportedOperationException();
}
/**
* Skip the tokens until the stop character is reached.
*
* @param begin
* The begin index
* @param stop
* The stop character
*/
protected final void skipTokens(final int begin, final char stop) {
int end = begin;
while (end < fContent.length() && fContent.charAt(end) != stop)
end++;
if (end < fContent.length()) {
fNext = end;
fPredecessor = fNext;
fSuccessor = fWordIterator.following(fNext);
} else
fSuccessor = BreakIterator.DONE;
}
/*
* @see org.eclipse.spelling.done.ISpellCheckIterator#startsSentence()
*/
public final boolean startsSentence() {
return fStartsSentence;
}
}