022cabf66fa6403d45e59ecd9ea7b292bd1fe377
[phpeclipse.git] / net.sourceforge.phpeclipse.ui / src / net / sourceforge / phpdt / internal / ui / text / spelling / SpellCheckIterator.java
1 /*******************************************************************************
2  * Copyright (c) 2000, 2003 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials 
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  * 
8  * Contributors:
9  *     IBM Corporation - initial API and implementation
10  *******************************************************************************/
11
12 package net.sourceforge.phpdt.internal.ui.text.spelling;
13
14 import java.text.BreakIterator;
15 import java.util.LinkedList;
16 import java.util.Locale;
17
18 import net.sourceforge.phpdt.corext.refactoring.nls.NLSElement;
19 import net.sourceforge.phpdt.internal.ui.text.phpdoc.IHtmlTagConstants;
20 import net.sourceforge.phpdt.internal.ui.text.phpdoc.IJavaDocTagConstants;
21 import net.sourceforge.phpdt.internal.ui.text.spelling.engine.DefaultSpellChecker;
22 import net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellCheckIterator;
23
24 import org.eclipse.jface.text.IDocument;
25 import org.eclipse.jface.text.IRegion;
26 import org.eclipse.jface.text.TextUtilities;
27
28 /**
29  * Iterator to spell-check javadoc comment regions.
30  * 
31  * @since 3.0
32  */
33 public class SpellCheckIterator implements ISpellCheckIterator,
34                 IJavaDocTagConstants, IHtmlTagConstants {
35
36         /** The content of the region */
37         private final String fContent;
38
39         /** The line delimiter */
40         private final String fDelimiter;
41
42         /** The last token */
43         private String fLastToken = null;
44
45         /** The next break */
46         private int fNext = 1;
47
48         /** The offset of the region */
49         private final int fOffset;
50
51         /** The predecessor break */
52         private int fPredecessor;
53
54         /** The previous break */
55         private int fPrevious = 0;
56
57         /** The sentence breaks */
58         private final LinkedList fSentenceBreaks = new LinkedList();
59
60         /** Does the current word start a sentence? */
61         private boolean fStartsSentence = false;
62
63         /** The successor break */
64         private int fSuccessor;
65
66         /** The word iterator */
67         private final BreakIterator fWordIterator;
68
69         /**
70          * Creates a new spell check iterator.
71          * 
72          * @param document
73          *            The document containing the specified partition
74          * @param region
75          *            The region to spell-check
76          * @param locale
77          *            The locale to use for spell-checking
78          */
79         public SpellCheckIterator(final IDocument document, final IRegion region,
80                         final Locale locale) {
81
82                 fOffset = region.getOffset();
83                 fWordIterator = BreakIterator.getWordInstance(locale);
84                 fDelimiter = TextUtilities.getDefaultLineDelimiter(document);
85
86                 String content;
87                 try {
88
89                         content = document.get(region.getOffset(), region.getLength());
90                         if (content.startsWith(NLSElement.TAG_PREFIX))
91                                 content = ""; //$NON-NLS-1$
92
93                 } catch (Exception exception) {
94                         content = ""; //$NON-NLS-1$
95                 }
96                 fContent = content;
97
98                 fWordIterator.setText(content);
99                 fPredecessor = fWordIterator.first();
100                 fSuccessor = fWordIterator.next();
101
102                 final BreakIterator iterator = BreakIterator
103                                 .getSentenceInstance(locale);
104                 iterator.setText(content);
105
106                 int offset = iterator.current();
107                 while (offset != BreakIterator.DONE) {
108
109                         fSentenceBreaks.add(new Integer(offset));
110                         offset = iterator.next();
111                 }
112         }
113
114         /*
115          * @see org.eclipse.spelling.done.ISpellCheckIterator#getBegin()
116          */
117         public final int getBegin() {
118                 return fPrevious + fOffset;
119         }
120
121         /*
122          * @see org.eclipse.spelling.done.ISpellCheckIterator#getEnd()
123          */
124         public final int getEnd() {
125                 return fNext + fOffset - 1;
126         }
127
128         /*
129          * @see java.util.Iterator#hasNext()
130          */
131         public final boolean hasNext() {
132                 return fSuccessor != BreakIterator.DONE;
133         }
134
135         /**
136          * Does the specified token consist of at least one letter and digits only?
137          * 
138          * @param begin
139          *            The begin index
140          * @param end
141          *            The end index
142          * @return <code>true</code> iff the token consists of digits and at least
143          *         one letter only, <code>false</code> otherwise
144          */
145         protected final boolean isAlphaNumeric(final int begin, final int end) {
146
147                 char character = 0;
148
149                 boolean letter = false;
150                 for (int index = begin; index < end; index++) {
151
152                         character = fContent.charAt(index);
153                         if (Character.isLetter(character))
154                                 letter = true;
155
156                         if (!Character.isLetterOrDigit(character))
157                                 return false;
158                 }
159                 return letter;
160         }
161
162         /**
163          * Was the last token a Javadoc tag tag?
164          * 
165          * @param tags
166          *            The javadoc tags to check
167          * @return <code>true</code> iff the last token was a Javadoc tag,
168          *         <code>false</code> otherwise
169          */
170         protected final boolean isJavadocToken(final String[] tags) {
171
172                 if (fLastToken != null) {
173
174                         for (int index = 0; index < tags.length; index++) {
175
176                                 if (fLastToken.equals(tags[index]))
177                                         return true;
178                         }
179                 }
180                 return false;
181         }
182
183         /**
184          * Is the current token a single letter token surrounded by non-whitespace
185          * characters?
186          * 
187          * @param begin
188          *            The begin index
189          * @return <code>true</code> iff the token is a single letter token,
190          *         <code>false</code> otherwise
191          */
192         protected final boolean isSingleLetter(final int begin) {
193
194                 if (begin > 0 && begin < fContent.length() - 1)
195                         return Character.isWhitespace(fContent.charAt(begin - 1))
196                                         && Character.isLetter(fContent.charAt(begin))
197                                         && Character.isWhitespace(fContent.charAt(begin + 1));
198
199                 return false;
200         }
201
202         /**
203          * Does the specified token look like an URL?
204          * 
205          * @param begin
206          *            The begin index
207          * @return <code>true</code> iff this token look like an URL,
208          *         <code>false</code> otherwise
209          */
210         protected final boolean isUrlToken(final int begin) {
211
212                 for (int index = 0; index < DefaultSpellChecker.URL_PREFIXES.length; index++) {
213
214                         if (fContent.startsWith(DefaultSpellChecker.URL_PREFIXES[index],
215                                         begin))
216                                 return true;
217                 }
218                 return false;
219         }
220
221         /**
222          * Does the specified token consist of whitespace only?
223          * 
224          * @param begin
225          *            The begin index
226          * @param end
227          *            The end index
228          * @return <code>true</code> iff the token consists of whitespace only,
229          *         <code>false</code> otherwise
230          */
231         protected final boolean isWhitespace(final int begin, final int end) {
232
233                 for (int index = begin; index < end; index++) {
234
235                         if (!Character.isWhitespace(fContent.charAt(index)))
236                                 return false;
237                 }
238                 return true;
239         }
240
241         /*
242          * @see java.util.Iterator#next()
243          */
244         public final Object next() {
245
246                 String token = nextToken();
247                 while (token == null && fSuccessor != BreakIterator.DONE)
248                         token = nextToken();
249
250                 fLastToken = token;
251
252                 return token;
253         }
254
255         /**
256          * Advances the end index to the next word break.
257          */
258         protected final void nextBreak() {
259
260                 fNext = fSuccessor;
261                 fPredecessor = fSuccessor;
262
263                 fSuccessor = fWordIterator.next();
264         }
265
266         /**
267          * Returns the next sentence break.
268          * 
269          * @return The next sentence break
270          */
271         protected final int nextSentence() {
272                 return ((Integer) fSentenceBreaks.getFirst()).intValue();
273         }
274
275         /**
276          * Determines the next token to be spell-checked.
277          * 
278          * @return The next token to be spell-checked, or <code>null</code> iff
279          *         the next token is not a candidate for spell-checking.
280          */
281         protected String nextToken() {
282
283                 String token = null;
284
285                 fPrevious = fPredecessor;
286                 fStartsSentence = false;
287
288                 nextBreak();
289
290                 boolean update = false;
291                 if (fNext - fPrevious > 0) {
292
293                         if (fSuccessor != BreakIterator.DONE
294                                         && fContent.charAt(fPrevious) == JAVADOC_TAG_PREFIX) {
295
296                                 nextBreak();
297                                 if (Character.isLetter(fContent.charAt(fPrevious + 1))) {
298                                         update = true;
299                                         token = fContent.substring(fPrevious, fNext);
300                                 } else
301                                         fPredecessor = fNext;
302
303                         } else if (fSuccessor != BreakIterator.DONE
304                                         && fContent.charAt(fPrevious) == HTML_TAG_PREFIX
305                                         && (Character.isLetter(fContent.charAt(fNext)) || fContent
306                                                         .charAt(fNext) == '/')) {
307
308                                 if (fContent.startsWith(HTML_CLOSE_PREFIX, fPrevious))
309                                         nextBreak();
310
311                                 nextBreak();
312
313                                 if (fSuccessor != BreakIterator.DONE
314                                                 && fContent.charAt(fNext) == HTML_TAG_POSTFIX) {
315
316                                         nextBreak();
317                                         if (fSuccessor != BreakIterator.DONE) {
318                                                 update = true;
319                                                 token = fContent.substring(fPrevious, fNext);
320                                         }
321                                 }
322                         } else if (!isWhitespace(fPrevious, fNext)
323                                         && isAlphaNumeric(fPrevious, fNext)) {
324
325                                 if (isUrlToken(fPrevious))
326                                         skipTokens(fPrevious, ' ');
327                                 else if (isJavadocToken(JAVADOC_PARAM_TAGS))
328                                         fLastToken = null;
329                                 else if (isJavadocToken(JAVADOC_REFERENCE_TAGS)) {
330                                         fLastToken = null;
331                                         skipTokens(fPrevious, fDelimiter.charAt(0));
332                                 } else if (fNext - fPrevious > 1 || isSingleLetter(fPrevious))
333                                         token = fContent.substring(fPrevious, fNext);
334
335                                 update = true;
336                         }
337                 }
338
339                 if (update && fSentenceBreaks.size() > 0) {
340
341                         if (fPrevious >= nextSentence()) {
342
343                                 while (fSentenceBreaks.size() > 0
344                                                 && fPrevious >= nextSentence())
345                                         fSentenceBreaks.removeFirst();
346
347                                 fStartsSentence = (fLastToken == null) || (token != null);
348                         }
349                 }
350                 return token;
351         }
352
353         /*
354          * @see java.util.Iterator#remove()
355          */
356         public final void remove() {
357                 throw new UnsupportedOperationException();
358         }
359
360         /**
361          * Skip the tokens until the stop character is reached.
362          * 
363          * @param begin
364          *            The begin index
365          * @param stop
366          *            The stop character
367          */
368         protected final void skipTokens(final int begin, final char stop) {
369
370                 int end = begin;
371
372                 while (end < fContent.length() && fContent.charAt(end) != stop)
373                         end++;
374
375                 if (end < fContent.length()) {
376
377                         fNext = end;
378                         fPredecessor = fNext;
379
380                         fSuccessor = fWordIterator.following(fNext);
381                 } else
382                         fSuccessor = BreakIterator.DONE;
383         }
384
385         /*
386          * @see org.eclipse.spelling.done.ISpellCheckIterator#startsSentence()
387          */
388         public final boolean startsSentence() {
389                 return fStartsSentence;
390         }
391 }