net.sourceforge.phpeclipse.ui/src/net/sourceforge/phpdt/internal/ui/text/spelling/SpellCheckIterator.java

   1 /*******************************************************************************
   2  * Copyright (c) 2000, 2003 IBM Corporation and others.
   3  * All rights reserved. This program and the accompanying materials
   4  * are made available under the terms of the Common Public License v1.0
   5  * which accompanies this distribution, and is available at
   6  * http://www.eclipse.org/legal/cpl-v10.html
   7  *
   8  * Contributors:
   9  *     IBM Corporation - initial API and implementation
  10  *******************************************************************************/
  11
  12 package net.sourceforge.phpdt.internal.ui.text.spelling;
  13
  14 import java.text.BreakIterator;
  15 import java.util.LinkedList;
  16 import java.util.Locale;
  17
  18 import net.sourceforge.phpdt.corext.refactoring.nls.NLSElement;
  19 import net.sourceforge.phpdt.internal.ui.text.phpdoc.IHtmlTagConstants;
  20 import net.sourceforge.phpdt.internal.ui.text.phpdoc.IJavaDocTagConstants;
  21 import net.sourceforge.phpdt.internal.ui.text.spelling.engine.DefaultSpellChecker;
  22 import net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellCheckIterator;
  23
  24 import org.eclipse.jface.text.IDocument;
  25 import org.eclipse.jface.text.IRegion;
  26 import org.eclipse.jface.text.TextUtilities;
  27
  28 /**
  29  * Iterator to spell-check javadoc comment regions.
  30  *
  31  * @since 3.0
  32  */
  33 public class SpellCheckIterator implements ISpellCheckIterator,
  34                 IJavaDocTagConstants, IHtmlTagConstants {
  35
  36         /** The content of the region */
  37         private final String fContent;
  38
  39         /** The line delimiter */
  40         private final String fDelimiter;
  41
  42         /** The last token */
  43         private String fLastToken = null;
  44
  45         /** The next break */
  46         private int fNext = 1;
  47
  48         /** The offset of the region */
  49         private final int fOffset;
  50
  51         /** The predecessor break */
  52         private int fPredecessor;
  53
  54         /** The previous break */
  55         private int fPrevious = 0;
  56
  57         /** The sentence breaks */
  58         private final LinkedList fSentenceBreaks = new LinkedList();
  59
  60         /** Does the current word start a sentence? */
  61         private boolean fStartsSentence = false;
  62
  63         /** The successor break */
  64         private int fSuccessor;
  65
  66         /** The word iterator */
  67         private final BreakIterator fWordIterator;
  68
  69         /**
  70          * Creates a new spell check iterator.
  71          *
  72          * @param document
  73          *            The document containing the specified partition
  74          * @param region
  75          *            The region to spell-check
  76          * @param locale
  77          *            The locale to use for spell-checking
  78          */
  79         public SpellCheckIterator(final IDocument document, final IRegion region,
  80                         final Locale locale) {
  81
  82                 fOffset = region.getOffset();
  83                 fWordIterator = BreakIterator.getWordInstance(locale);
  84                 fDelimiter = TextUtilities.getDefaultLineDelimiter(document);
  85
  86                 String content;
  87                 try {
  88
  89                         content = document.get(region.getOffset(), region.getLength());
  90                         if (content.startsWith(NLSElement.TAG_PREFIX))
  91                                 content = ""; //$NON-NLS-1$
  92
  93                 } catch (Exception exception) {
  94                         content = ""; //$NON-NLS-1$
  95                 }
  96                 fContent = content;
  97
  98                 fWordIterator.setText(content);
  99                 fPredecessor = fWordIterator.first();
 100                 fSuccessor = fWordIterator.next();
 101
 102                 final BreakIterator iterator = BreakIterator
 103                                 .getSentenceInstance(locale);
 104                 iterator.setText(content);
 105
 106                 int offset = iterator.current();
 107                 while (offset != BreakIterator.DONE) {
 108
 109                         fSentenceBreaks.add(new Integer(offset));
 110                         offset = iterator.next();
 111                 }
 112         }
 113
 114         /*
 115          * @see org.eclipse.spelling.done.ISpellCheckIterator#getBegin()
 116          */
 117         public final int getBegin() {
 118                 return fPrevious + fOffset;
 119         }
 120
 121         /*
 122          * @see org.eclipse.spelling.done.ISpellCheckIterator#getEnd()
 123          */
 124         public final int getEnd() {
 125                 return fNext + fOffset - 1;
 126         }
 127
 128         /*
 129          * @see java.util.Iterator#hasNext()
 130          */
 131         public final boolean hasNext() {
 132                 return fSuccessor != BreakIterator.DONE;
 133         }
 134
 135         /**
 136          * Does the specified token consist of at least one letter and digits only?
 137          *
 138          * @param begin
 139          *            The begin index
 140          * @param end
 141          *            The end index
 142          * @return <code>true</code> iff the token consists of digits and at least
 143          *         one letter only, <code>false</code> otherwise
 144          */
 145         protected final boolean isAlphaNumeric(final int begin, final int end) {
 146
 147                 char character = 0;
 148
 149                 boolean letter = false;
 150                 for (int index = begin; index < end; index++) {
 151
 152                         character = fContent.charAt(index);
 153                         if (Character.isLetter(character))
 154                                 letter = true;
 155
 156                         if (!Character.isLetterOrDigit(character))
 157                                 return false;
 158                 }
 159                 return letter;
 160         }
 161
 162         /**
 163          * Was the last token a Javadoc tag tag?
 164          *
 165          * @param tags
 166          *            The javadoc tags to check
 167          * @return <code>true</code> iff the last token was a Javadoc tag,
 168          *         <code>false</code> otherwise
 169          */
 170         protected final boolean isJavadocToken(final String[] tags) {
 171
 172                 if (fLastToken != null) {
 173
 174                         for (int index = 0; index < tags.length; index++) {
 175
 176                                 if (fLastToken.equals(tags[index]))
 177                                         return true;
 178                         }
 179                 }
 180                 return false;
 181         }
 182
 183         /**
 184          * Is the current token a single letter token surrounded by non-whitespace
 185          * characters?
 186          *
 187          * @param begin
 188          *            The begin index
 189          * @return <code>true</code> iff the token is a single letter token,
 190          *         <code>false</code> otherwise
 191          */
 192         protected final boolean isSingleLetter(final int begin) {
 193
 194                 if (begin > 0 && begin < fContent.length() - 1)
 195                         return Character.isWhitespace(fContent.charAt(begin - 1))
 196                                         && Character.isLetter(fContent.charAt(begin))
 197                                         && Character.isWhitespace(fContent.charAt(begin + 1));
 198
 199                 return false;
 200         }
 201
 202         /**
 203          * Does the specified token look like an URL?
 204          *
 205          * @param begin
 206          *            The begin index
 207          * @return <code>true</code> iff this token look like an URL,
 208          *         <code>false</code> otherwise
 209          */
 210         protected final boolean isUrlToken(final int begin) {
 211
 212                 for (int index = 0; index < DefaultSpellChecker.URL_PREFIXES.length; index++) {
 213
 214                         if (fContent.startsWith(DefaultSpellChecker.URL_PREFIXES[index],
 215                                         begin))
 216                                 return true;
 217                 }
 218                 return false;
 219         }
 220
 221         /**
 222          * Does the specified token consist of whitespace only?
 223          *
 224          * @param begin
 225          *            The begin index
 226          * @param end
 227          *            The end index
 228          * @return <code>true</code> iff the token consists of whitespace only,
 229          *         <code>false</code> otherwise
 230          */
 231         protected final boolean isWhitespace(final int begin, final int end) {
 232
 233                 for (int index = begin; index < end; index++) {
 234
 235                         if (!Character.isWhitespace(fContent.charAt(index)))
 236                                 return false;
 237                 }
 238                 return true;
 239         }
 240
 241         /*
 242          * @see java.util.Iterator#next()
 243          */
 244         public final Object next() {
 245
 246                 String token = nextToken();
 247                 while (token == null && fSuccessor != BreakIterator.DONE)
 248                         token = nextToken();
 249
 250                 fLastToken = token;
 251
 252                 return token;
 253         }
 254
 255         /**
 256          * Advances the end index to the next word break.
 257          */
 258         protected final void nextBreak() {
 259
 260                 fNext = fSuccessor;
 261                 fPredecessor = fSuccessor;
 262
 263                 fSuccessor = fWordIterator.next();
 264         }
 265
 266         /**
 267          * Returns the next sentence break.
 268          *
 269          * @return The next sentence break
 270          */
 271         protected final int nextSentence() {
 272                 return ((Integer) fSentenceBreaks.getFirst()).intValue();
 273         }
 274
 275         /**
 276          * Determines the next token to be spell-checked.
 277          *
 278          * @return The next token to be spell-checked, or <code>null</code> iff
 279          *         the next token is not a candidate for spell-checking.
 280          */
 281         protected String nextToken() {
 282
 283                 String token = null;
 284
 285                 fPrevious = fPredecessor;
 286                 fStartsSentence = false;
 287
 288                 nextBreak();
 289
 290                 boolean update = false;
 291                 if (fNext - fPrevious > 0) {
 292
 293                         if (fSuccessor != BreakIterator.DONE
 294                                         && fContent.charAt(fPrevious) == JAVADOC_TAG_PREFIX) {
 295
 296                                 nextBreak();
 297                                 if (Character.isLetter(fContent.charAt(fPrevious + 1))) {
 298                                         update = true;
 299                                         token = fContent.substring(fPrevious, fNext);
 300                                 } else
 301                                         fPredecessor = fNext;
 302
 303                         } else if (fSuccessor != BreakIterator.DONE
 304                                         && fContent.charAt(fPrevious) == HTML_TAG_PREFIX
 305                                         && (Character.isLetter(fContent.charAt(fNext)) || fContent
 306                                                         .charAt(fNext) == '/')) {
 307
 308                                 if (fContent.startsWith(HTML_CLOSE_PREFIX, fPrevious))
 309                                         nextBreak();
 310
 311                                 nextBreak();
 312
 313                                 if (fSuccessor != BreakIterator.DONE
 314                                                 && fContent.charAt(fNext) == HTML_TAG_POSTFIX) {
 315
 316                                         nextBreak();
 317                                         if (fSuccessor != BreakIterator.DONE) {
 318                                                 update = true;
 319                                                 token = fContent.substring(fPrevious, fNext);
 320                                         }
 321                                 }
 322                         } else if (!isWhitespace(fPrevious, fNext)
 323                                         && isAlphaNumeric(fPrevious, fNext)) {
 324
 325                                 if (isUrlToken(fPrevious))
 326                                         skipTokens(fPrevious, ' ');
 327                                 else if (isJavadocToken(JAVADOC_PARAM_TAGS))
 328                                         fLastToken = null;
 329                                 else if (isJavadocToken(JAVADOC_REFERENCE_TAGS)) {
 330                                         fLastToken = null;
 331                                         skipTokens(fPrevious, fDelimiter.charAt(0));
 332                                 } else if (fNext - fPrevious > 1 || isSingleLetter(fPrevious))
 333                                         token = fContent.substring(fPrevious, fNext);
 334
 335                                 update = true;
 336                         }
 337                 }
 338
 339                 if (update && fSentenceBreaks.size() > 0) {
 340
 341                         if (fPrevious >= nextSentence()) {
 342
 343                                 while (fSentenceBreaks.size() > 0
 344                                                 && fPrevious >= nextSentence())
 345                                         fSentenceBreaks.removeFirst();
 346
 347                                 fStartsSentence = (fLastToken == null) || (token != null);
 348                         }
 349                 }
 350                 return token;
 351         }
 352
 353         /*
 354          * @see java.util.Iterator#remove()
 355          */
 356         public final void remove() {
 357                 throw new UnsupportedOperationException();
 358         }
 359
 360         /**
 361          * Skip the tokens until the stop character is reached.
 362          *
 363          * @param begin
 364          *            The begin index
 365          * @param stop
 366          *            The stop character
 367          */
 368         protected final void skipTokens(final int begin, final char stop) {
 369
 370                 int end = begin;
 371
 372                 while (end < fContent.length() && fContent.charAt(end) != stop)
 373                         end++;
 374
 375                 if (end < fContent.length()) {
 376
 377                         fNext = end;
 378                         fPredecessor = fNext;
 379
 380                         fSuccessor = fWordIterator.following(fNext);
 381                 } else
 382                         fSuccessor = BreakIterator.DONE;
 383         }
 384
 385         /*
 386          * @see org.eclipse.spelling.done.ISpellCheckIterator#startsSentence()
 387          */
 388         public final boolean startsSentence() {
 389                 return fStartsSentence;
 390         }
 391 }