1 /*******************************************************************************
2 * Copyright (c) 2000, 2003 IBM Corporation and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v10.html
9 * IBM Corporation - initial API and implementation
10 *******************************************************************************/
12 package net.sourceforge.phpdt.internal.ui.text.spelling;
14 import java.text.BreakIterator;
15 import java.util.LinkedList;
16 import java.util.Locale;
18 import net.sourceforge.phpdt.corext.refactoring.nls.NLSElement;
19 import net.sourceforge.phpdt.internal.ui.text.phpdoc.IHtmlTagConstants;
20 import net.sourceforge.phpdt.internal.ui.text.phpdoc.IJavaDocTagConstants;
21 import net.sourceforge.phpdt.internal.ui.text.spelling.engine.DefaultSpellChecker;
22 import net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellCheckIterator;
24 import org.eclipse.jface.text.IDocument;
25 import org.eclipse.jface.text.IRegion;
26 import org.eclipse.jface.text.TextUtilities;
29 * Iterator to spell-check javadoc comment regions.
33 public class SpellCheckIterator implements ISpellCheckIterator,
34 IJavaDocTagConstants, IHtmlTagConstants {
36 /** The content of the region */
37 private final String fContent;
39 /** The line delimiter */
40 private final String fDelimiter;
43 private String fLastToken = null;
46 private int fNext = 1;
48 /** The offset of the region */
49 private final int fOffset;
51 /** The predecessor break */
52 private int fPredecessor;
54 /** The previous break */
55 private int fPrevious = 0;
57 /** The sentence breaks */
58 private final LinkedList fSentenceBreaks = new LinkedList();
60 /** Does the current word start a sentence? */
61 private boolean fStartsSentence = false;
63 /** The successor break */
64 private int fSuccessor;
66 /** The word iterator */
67 private final BreakIterator fWordIterator;
70 * Creates a new spell check iterator.
73 * The document containing the specified partition
75 * The region to spell-check
77 * The locale to use for spell-checking
79 public SpellCheckIterator(final IDocument document, final IRegion region,
80 final Locale locale) {
82 fOffset = region.getOffset();
83 fWordIterator = BreakIterator.getWordInstance(locale);
84 fDelimiter = TextUtilities.getDefaultLineDelimiter(document);
89 content = document.get(region.getOffset(), region.getLength());
90 if (content.startsWith(NLSElement.TAG_PREFIX))
91 content = ""; //$NON-NLS-1$
93 } catch (Exception exception) {
94 content = ""; //$NON-NLS-1$
98 fWordIterator.setText(content);
99 fPredecessor = fWordIterator.first();
100 fSuccessor = fWordIterator.next();
102 final BreakIterator iterator = BreakIterator
103 .getSentenceInstance(locale);
104 iterator.setText(content);
106 int offset = iterator.current();
107 while (offset != BreakIterator.DONE) {
109 fSentenceBreaks.add(new Integer(offset));
110 offset = iterator.next();
115 * @see org.eclipse.spelling.done.ISpellCheckIterator#getBegin()
117 public final int getBegin() {
118 return fPrevious + fOffset;
122 * @see org.eclipse.spelling.done.ISpellCheckIterator#getEnd()
124 public final int getEnd() {
125 return fNext + fOffset - 1;
129 * @see java.util.Iterator#hasNext()
131 public final boolean hasNext() {
132 return fSuccessor != BreakIterator.DONE;
136 * Does the specified token consist of at least one letter and digits only?
142 * @return <code>true</code> iff the token consists of digits and at least
143 * one letter only, <code>false</code> otherwise
145 protected final boolean isAlphaNumeric(final int begin, final int end) {
149 boolean letter = false;
150 for (int index = begin; index < end; index++) {
152 character = fContent.charAt(index);
153 if (Character.isLetter(character))
156 if (!Character.isLetterOrDigit(character))
163 * Was the last token a Javadoc tag tag?
166 * The javadoc tags to check
167 * @return <code>true</code> iff the last token was a Javadoc tag,
168 * <code>false</code> otherwise
170 protected final boolean isJavadocToken(final String[] tags) {
172 if (fLastToken != null) {
174 for (int index = 0; index < tags.length; index++) {
176 if (fLastToken.equals(tags[index]))
184 * Is the current token a single letter token surrounded by non-whitespace
189 * @return <code>true</code> iff the token is a single letter token,
190 * <code>false</code> otherwise
192 protected final boolean isSingleLetter(final int begin) {
194 if (begin > 0 && begin < fContent.length() - 1)
195 return Character.isWhitespace(fContent.charAt(begin - 1))
196 && Character.isLetter(fContent.charAt(begin))
197 && Character.isWhitespace(fContent.charAt(begin + 1));
203 * Does the specified token look like an URL?
207 * @return <code>true</code> iff this token look like an URL,
208 * <code>false</code> otherwise
210 protected final boolean isUrlToken(final int begin) {
212 for (int index = 0; index < DefaultSpellChecker.URL_PREFIXES.length; index++) {
214 if (fContent.startsWith(DefaultSpellChecker.URL_PREFIXES[index],
222 * Does the specified token consist of whitespace only?
228 * @return <code>true</code> iff the token consists of whitespace only,
229 * <code>false</code> otherwise
231 protected final boolean isWhitespace(final int begin, final int end) {
233 for (int index = begin; index < end; index++) {
235 if (!Character.isWhitespace(fContent.charAt(index)))
242 * @see java.util.Iterator#next()
244 public final Object next() {
246 String token = nextToken();
247 while (token == null && fSuccessor != BreakIterator.DONE)
256 * Advances the end index to the next word break.
258 protected final void nextBreak() {
261 fPredecessor = fSuccessor;
263 fSuccessor = fWordIterator.next();
267 * Returns the next sentence break.
269 * @return The next sentence break
271 protected final int nextSentence() {
272 return ((Integer) fSentenceBreaks.getFirst()).intValue();
276 * Determines the next token to be spell-checked.
278 * @return The next token to be spell-checked, or <code>null</code> iff
279 * the next token is not a candidate for spell-checking.
281 protected String nextToken() {
285 fPrevious = fPredecessor;
286 fStartsSentence = false;
290 boolean update = false;
291 if (fNext - fPrevious > 0) {
293 if (fSuccessor != BreakIterator.DONE
294 && fContent.charAt(fPrevious) == JAVADOC_TAG_PREFIX) {
297 if (Character.isLetter(fContent.charAt(fPrevious + 1))) {
299 token = fContent.substring(fPrevious, fNext);
301 fPredecessor = fNext;
303 } else if (fSuccessor != BreakIterator.DONE
304 && fContent.charAt(fPrevious) == HTML_TAG_PREFIX
305 && (Character.isLetter(fContent.charAt(fNext)) || fContent
306 .charAt(fNext) == '/')) {
308 if (fContent.startsWith(HTML_CLOSE_PREFIX, fPrevious))
313 if (fSuccessor != BreakIterator.DONE
314 && fContent.charAt(fNext) == HTML_TAG_POSTFIX) {
317 if (fSuccessor != BreakIterator.DONE) {
319 token = fContent.substring(fPrevious, fNext);
322 } else if (!isWhitespace(fPrevious, fNext)
323 && isAlphaNumeric(fPrevious, fNext)) {
325 if (isUrlToken(fPrevious))
326 skipTokens(fPrevious, ' ');
327 else if (isJavadocToken(JAVADOC_PARAM_TAGS))
329 else if (isJavadocToken(JAVADOC_REFERENCE_TAGS)) {
331 skipTokens(fPrevious, fDelimiter.charAt(0));
332 } else if (fNext - fPrevious > 1 || isSingleLetter(fPrevious))
333 token = fContent.substring(fPrevious, fNext);
339 if (update && fSentenceBreaks.size() > 0) {
341 if (fPrevious >= nextSentence()) {
343 while (fSentenceBreaks.size() > 0
344 && fPrevious >= nextSentence())
345 fSentenceBreaks.removeFirst();
347 fStartsSentence = (fLastToken == null) || (token != null);
354 * @see java.util.Iterator#remove()
356 public final void remove() {
357 throw new UnsupportedOperationException();
361 * Skip the tokens until the stop character is reached.
368 protected final void skipTokens(final int begin, final char stop) {
372 while (end < fContent.length() && fContent.charAt(end) != stop)
375 if (end < fContent.length()) {
378 fPredecessor = fNext;
380 fSuccessor = fWordIterator.following(fNext);
382 fSuccessor = BreakIterator.DONE;
386 * @see org.eclipse.spelling.done.ISpellCheckIterator#startsSentence()
388 public final boolean startsSentence() {
389 return fStartsSentence;