net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/ui/text/spelling/SpellCheckIterator.java

   1 /*******************************************************************************
   2  * Copyright (c) 2000, 2003 IBM Corporation and others.
   3  * All rights reserved. This program and the accompanying materials
   4  * are made available under the terms of the Common Public License v1.0
   5  * which accompanies this distribution, and is available at
   6  * http://www.eclipse.org/legal/cpl-v10.html
   7  *
   8  * Contributors:
   9  *     IBM Corporation - initial API and implementation
  10  *******************************************************************************/
  11
  12 package net.sourceforge.phpdt.internal.ui.text.spelling;
  13
  14 import java.text.BreakIterator;
  15 import java.util.LinkedList;
  16 import java.util.Locale;
  17
  18 import net.sourceforge.phpdt.corext.refactoring.nls.NLSElement;
  19 import net.sourceforge.phpdt.internal.ui.text.phpdoc.IHtmlTagConstants;
  20 import net.sourceforge.phpdt.internal.ui.text.phpdoc.IJavaDocTagConstants;
  21 import net.sourceforge.phpdt.internal.ui.text.spelling.engine.DefaultSpellChecker;
  22 import net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellCheckIterator;
  23
  24 import org.eclipse.jface.text.IDocument;
  25 import org.eclipse.jface.text.IRegion;
  26 import org.eclipse.jface.text.TextUtilities;
  27
  28 /**
  29  * Iterator to spell-check javadoc comment regions.
  30  *
  31  * @since 3.0
  32  */
  33 public class SpellCheckIterator implements ISpellCheckIterator, IJavaDocTagConstants, IHtmlTagConstants {
  34
  35         /** The content of the region */
  36         private final String fContent;
  37
  38         /** The line delimiter */
  39         private final String fDelimiter;
  40
  41         /** The last token */
  42         private String fLastToken= null;
  43
  44         /** The next break */
  45         private int fNext= 1;
  46
  47         /** The offset of the region */
  48         private final int fOffset;
  49
  50         /** The predecessor break */
  51         private int fPredecessor;
  52
  53         /** The previous break */
  54         private int fPrevious= 0;
  55
  56         /** The sentence breaks */
  57         private final LinkedList fSentenceBreaks= new LinkedList();
  58
  59         /** Does the current word start a sentence? */
  60         private boolean fStartsSentence= false;
  61
  62         /** The successor break */
  63         private int fSuccessor;
  64
  65         /** The word iterator */
  66         private final BreakIterator fWordIterator;
  67
  68         /**
  69          * Creates a new spell check iterator.
  70          *
  71          * @param document
  72          *                   The document containing the specified partition
  73          * @param region
  74          *                   The region to spell-check
  75          * @param locale
  76          *                   The locale to use for spell-checking
  77          */
  78         public SpellCheckIterator(final IDocument document, final IRegion region, final Locale locale) {
  79
  80                 fOffset= region.getOffset();
  81                 fWordIterator= BreakIterator.getWordInstance(locale);
  82                 fDelimiter= TextUtilities.getDefaultLineDelimiter(document);
  83
  84                 String content;
  85                 try {
  86
  87                         content= document.get(region.getOffset(), region.getLength());
  88                         if (content.startsWith(NLSElement.TAG_PREFIX))
  89                                 content= ""; //$NON-NLS-1$
  90
  91                 } catch (Exception exception) {
  92                         content= ""; //$NON-NLS-1$
  93                 }
  94                 fContent= content;
  95
  96                 fWordIterator.setText(content);
  97                 fPredecessor= fWordIterator.first();
  98                 fSuccessor= fWordIterator.next();
  99
 100                 final BreakIterator iterator= BreakIterator.getSentenceInstance(locale);
 101                 iterator.setText(content);
 102
 103                 int offset= iterator.current();
 104                 while (offset != BreakIterator.DONE) {
 105
 106                         fSentenceBreaks.add(new Integer(offset));
 107                         offset= iterator.next();
 108                 }
 109         }
 110
 111         /*
 112          * @see org.eclipse.spelling.done.ISpellCheckIterator#getBegin()
 113          */
 114         public final int getBegin() {
 115                 return fPrevious + fOffset;
 116         }
 117
 118         /*
 119          * @see org.eclipse.spelling.done.ISpellCheckIterator#getEnd()
 120          */
 121         public final int getEnd() {
 122                 return fNext + fOffset - 1;
 123         }
 124
 125         /*
 126          * @see java.util.Iterator#hasNext()
 127          */
 128         public final boolean hasNext() {
 129                 return fSuccessor != BreakIterator.DONE;
 130         }
 131
 132         /**
 133          * Does the specified token consist of at least one letter and digits only?
 134          *
 135          * @param begin
 136          *                   The begin index
 137          * @param end
 138          *                   The end index
 139          * @return <code>true</code> iff the token consists of digits and at
 140          *               least one letter only, <code>false</code> otherwise
 141          */
 142         protected final boolean isAlphaNumeric(final int begin, final int end) {
 143
 144                 char character= 0;
 145
 146                 boolean letter= false;
 147                 for (int index= begin; index < end; index++) {
 148
 149                         character= fContent.charAt(index);
 150                         if (Character.isLetter(character))
 151                                 letter= true;
 152
 153                         if (!Character.isLetterOrDigit(character))
 154                                 return false;
 155                 }
 156                 return letter;
 157         }
 158
 159         /**
 160          * Was the last token a Javadoc tag tag?
 161          *
 162          * @param tags
 163          *                   The javadoc tags to check
 164          * @return <code>true</code> iff the last token was a Javadoc tag, <code>false</code>
 165          *               otherwise
 166          */
 167         protected final boolean isJavadocToken(final String[] tags) {
 168
 169                 if (fLastToken != null) {
 170
 171                         for (int index= 0; index < tags.length; index++) {
 172
 173                                 if (fLastToken.equals(tags[index]))
 174                                         return true;
 175                         }
 176                 }
 177                 return false;
 178         }
 179
 180         /**
 181          * Is the current token a single letter token surrounded by non-whitespace
 182          * characters?
 183          *
 184          * @param begin
 185          *                   The begin index
 186          * @return <code>true</code> iff the token is a single letter token,
 187          *               <code>false</code> otherwise
 188          */
 189         protected final boolean isSingleLetter(final int begin) {
 190
 191                 if (begin > 0 && begin < fContent.length() - 1)
 192                         return Character.isWhitespace(fContent.charAt(begin - 1)) && Character.isLetter(fContent.charAt(begin)) && Character.isWhitespace(fContent.charAt(begin + 1));
 193
 194                 return false;
 195         }
 196
 197         /**
 198          * Does the specified token look like an URL?
 199          *
 200          * @param begin
 201          *                   The begin index
 202          * @return <code>true</code> iff this token look like an URL, <code>false</code>
 203          *               otherwise
 204          */
 205         protected final boolean isUrlToken(final int begin) {
 206
 207                 for (int index= 0; index < DefaultSpellChecker.URL_PREFIXES.length; index++) {
 208
 209                         if (fContent.startsWith(DefaultSpellChecker.URL_PREFIXES[index], begin))
 210                                 return true;
 211                 }
 212                 return false;
 213         }
 214
 215         /**
 216          * Does the specified token consist of whitespace only?
 217          *
 218          * @param begin
 219          *                   The begin index
 220          * @param end
 221          *                   The end index
 222          * @return <code>true</code> iff the token consists of whitespace only,
 223          *               <code>false</code> otherwise
 224          */
 225         protected final boolean isWhitespace(final int begin, final int end) {
 226
 227                 for (int index= begin; index < end; index++) {
 228
 229                         if (!Character.isWhitespace(fContent.charAt(index)))
 230                                 return false;
 231                 }
 232                 return true;
 233         }
 234
 235         /*
 236          * @see java.util.Iterator#next()
 237          */
 238         public final Object next() {
 239
 240                 String token= nextToken();
 241                 while (token == null && fSuccessor != BreakIterator.DONE)
 242                         token= nextToken();
 243
 244                 fLastToken= token;
 245
 246                 return token;
 247         }
 248
 249         /**
 250          * Advances the end index to the next word break.
 251          */
 252         protected final void nextBreak() {
 253
 254                 fNext= fSuccessor;
 255                 fPredecessor= fSuccessor;
 256
 257                 fSuccessor= fWordIterator.next();
 258         }
 259
 260         /**
 261          * Returns the next sentence break.
 262          *
 263          * @return The next sentence break
 264          */
 265         protected final int nextSentence() {
 266                 return ((Integer) fSentenceBreaks.getFirst()).intValue();
 267         }
 268
 269         /**
 270          * Determines the next token to be spell-checked.
 271          *
 272          * @return The next token to be spell-checked, or <code>null</code> iff
 273          *               the next token is not a candidate for spell-checking.
 274          */
 275         protected String nextToken() {
 276
 277                 String token= null;
 278
 279                 fPrevious= fPredecessor;
 280                 fStartsSentence= false;
 281
 282                 nextBreak();
 283
 284                 boolean update= false;
 285                 if (fNext - fPrevious > 0) {
 286
 287                         if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == JAVADOC_TAG_PREFIX) {
 288
 289                                 nextBreak();
 290                                 if (Character.isLetter(fContent.charAt(fPrevious + 1))) {
 291                                         update= true;
 292                                         token= fContent.substring(fPrevious, fNext);
 293                                 } else
 294                                         fPredecessor= fNext;
 295
 296                         } else if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == HTML_TAG_PREFIX && (Character.isLetter(fContent.charAt(fNext)) || fContent.charAt(fNext) == '/')) {
 297
 298                                 if (fContent.startsWith(HTML_CLOSE_PREFIX, fPrevious))
 299                                         nextBreak();
 300
 301                                 nextBreak();
 302
 303                                 if (fSuccessor != BreakIterator.DONE && fContent.charAt(fNext) == HTML_TAG_POSTFIX) {
 304
 305                                         nextBreak();
 306                                         if (fSuccessor != BreakIterator.DONE) {
 307                                                 update= true;
 308                                                 token= fContent.substring(fPrevious, fNext);
 309                                         }
 310                                 }
 311                         } else if (!isWhitespace(fPrevious, fNext) && isAlphaNumeric(fPrevious, fNext)) {
 312
 313                                 if (isUrlToken(fPrevious))
 314                                         skipTokens(fPrevious, ' ');
 315                                 else if (isJavadocToken(JAVADOC_PARAM_TAGS))
 316                                         fLastToken= null;
 317                                 else if (isJavadocToken(JAVADOC_REFERENCE_TAGS)) {
 318                                         fLastToken= null;
 319                                         skipTokens(fPrevious, fDelimiter.charAt(0));
 320                                 } else if (fNext - fPrevious > 1 || isSingleLetter(fPrevious))
 321                                         token= fContent.substring(fPrevious, fNext);
 322
 323                                 update= true;
 324                         }
 325                 }
 326
 327                 if (update && fSentenceBreaks.size() > 0) {
 328
 329                         if (fPrevious >= nextSentence()) {
 330
 331                                 while (fSentenceBreaks.size() > 0 && fPrevious >= nextSentence())
 332                                         fSentenceBreaks.removeFirst();
 333
 334                                 fStartsSentence= (fLastToken == null) || (token != null);
 335                         }
 336                 }
 337                 return token;
 338         }
 339
 340         /*
 341          * @see java.util.Iterator#remove()
 342          */
 343         public final void remove() {
 344                 throw new UnsupportedOperationException();
 345         }
 346
 347         /**
 348          * Skip the tokens until the stop character is reached.
 349          *
 350          * @param begin
 351          *                   The begin index
 352          * @param stop
 353          *                   The stop character
 354          */
 355         protected final void skipTokens(final int begin, final char stop) {
 356
 357                 int end= begin;
 358
 359                 while (end < fContent.length() && fContent.charAt(end) != stop)
 360                         end++;
 361
 362                 if (end < fContent.length()) {
 363
 364                         fNext= end;
 365                         fPredecessor= fNext;
 366
 367                         fSuccessor= fWordIterator.following(fNext);
 368                 } else
 369                         fSuccessor= BreakIterator.DONE;
 370         }
 371
 372         /*
 373          * @see org.eclipse.spelling.done.ISpellCheckIterator#startsSentence()
 374          */
 375         public final boolean startsSentence() {
 376                 return fStartsSentence;
 377         }
 378 }