RC2 compatibility
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / ui / text / spelling / SpellCheckIterator.java
1 /*******************************************************************************
2  * Copyright (c) 2000, 2003 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials 
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  * 
8  * Contributors:
9  *     IBM Corporation - initial API and implementation
10  *******************************************************************************/
11
12 package net.sourceforge.phpdt.internal.ui.text.spelling;
13
14 import java.text.BreakIterator;
15 import java.util.LinkedList;
16 import java.util.Locale;
17
18 import net.sourceforge.phpdt.corext.refactoring.nls.NLSElement;
19 import net.sourceforge.phpdt.internal.ui.text.phpdoc.IHtmlTagConstants;
20 import net.sourceforge.phpdt.internal.ui.text.phpdoc.IJavaDocTagConstants;
21 import net.sourceforge.phpdt.internal.ui.text.spelling.engine.DefaultSpellChecker;
22 import net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellCheckIterator;
23
24 import org.eclipse.jface.text.IDocument;
25 import org.eclipse.jface.text.IRegion;
26 import org.eclipse.jface.text.TextUtilities;
27
28 /**
29  * Iterator to spell-check javadoc comment regions.
30  * 
31  * @since 3.0
32  */
33 public class SpellCheckIterator implements ISpellCheckIterator, IJavaDocTagConstants, IHtmlTagConstants {
34
35         /** The content of the region */
36         private final String fContent;
37
38         /** The line delimiter */
39         private final String fDelimiter;
40
41         /** The last token */
42         private String fLastToken= null;
43
44         /** The next break */
45         private int fNext= 1;
46
47         /** The offset of the region */
48         private final int fOffset;
49
50         /** The predecessor break */
51         private int fPredecessor;
52
53         /** The previous break */
54         private int fPrevious= 0;
55
56         /** The sentence breaks */
57         private final LinkedList fSentenceBreaks= new LinkedList();
58
59         /** Does the current word start a sentence? */
60         private boolean fStartsSentence= false;
61
62         /** The successor break */
63         private int fSuccessor;
64
65         /** The word iterator */
66         private final BreakIterator fWordIterator;
67
68         /**
69          * Creates a new spell check iterator.
70          * 
71          * @param document
72          *                   The document containing the specified partition
73          * @param region
74          *                   The region to spell-check
75          * @param locale
76          *                   The locale to use for spell-checking
77          */
78         public SpellCheckIterator(final IDocument document, final IRegion region, final Locale locale) {
79
80                 fOffset= region.getOffset();
81                 fWordIterator= BreakIterator.getWordInstance(locale);
82                 fDelimiter= TextUtilities.getDefaultLineDelimiter(document);
83
84                 String content;
85                 try {
86
87                         content= document.get(region.getOffset(), region.getLength());
88                         if (content.startsWith(NLSElement.TAG_PREFIX))
89                                 content= ""; //$NON-NLS-1$
90
91                 } catch (Exception exception) {
92                         content= ""; //$NON-NLS-1$
93                 }
94                 fContent= content;
95
96                 fWordIterator.setText(content);
97                 fPredecessor= fWordIterator.first();
98                 fSuccessor= fWordIterator.next();
99
100                 final BreakIterator iterator= BreakIterator.getSentenceInstance(locale);
101                 iterator.setText(content);
102
103                 int offset= iterator.current();
104                 while (offset != BreakIterator.DONE) {
105
106                         fSentenceBreaks.add(new Integer(offset));
107                         offset= iterator.next();
108                 }
109         }
110
111         /*
112          * @see org.eclipse.spelling.done.ISpellCheckIterator#getBegin()
113          */
114         public final int getBegin() {
115                 return fPrevious + fOffset;
116         }
117
118         /*
119          * @see org.eclipse.spelling.done.ISpellCheckIterator#getEnd()
120          */
121         public final int getEnd() {
122                 return fNext + fOffset - 1;
123         }
124
125         /*
126          * @see java.util.Iterator#hasNext()
127          */
128         public final boolean hasNext() {
129                 return fSuccessor != BreakIterator.DONE;
130         }
131
132         /**
133          * Does the specified token consist of at least one letter and digits only?
134          * 
135          * @param begin
136          *                   The begin index
137          * @param end
138          *                   The end index
139          * @return <code>true</code> iff the token consists of digits and at
140          *               least one letter only, <code>false</code> otherwise
141          */
142         protected final boolean isAlphaNumeric(final int begin, final int end) {
143
144                 char character= 0;
145
146                 boolean letter= false;
147                 for (int index= begin; index < end; index++) {
148
149                         character= fContent.charAt(index);
150                         if (Character.isLetter(character))
151                                 letter= true;
152
153                         if (!Character.isLetterOrDigit(character))
154                                 return false;
155                 }
156                 return letter;
157         }
158
159         /**
160          * Was the last token a Javadoc tag tag?
161          * 
162          * @param tags
163          *                   The javadoc tags to check
164          * @return <code>true</code> iff the last token was a Javadoc tag, <code>false</code>
165          *               otherwise
166          */
167         protected final boolean isJavadocToken(final String[] tags) {
168
169                 if (fLastToken != null) {
170
171                         for (int index= 0; index < tags.length; index++) {
172
173                                 if (fLastToken.equals(tags[index]))
174                                         return true;
175                         }
176                 }
177                 return false;
178         }
179
180         /**
181          * Is the current token a single letter token surrounded by non-whitespace
182          * characters?
183          * 
184          * @param begin
185          *                   The begin index
186          * @return <code>true</code> iff the token is a single letter token,
187          *               <code>false</code> otherwise
188          */
189         protected final boolean isSingleLetter(final int begin) {
190
191                 if (begin > 0 && begin < fContent.length() - 1)
192                         return Character.isWhitespace(fContent.charAt(begin - 1)) && Character.isLetter(fContent.charAt(begin)) && Character.isWhitespace(fContent.charAt(begin + 1));
193
194                 return false;
195         }
196
197         /**
198          * Does the specified token look like an URL?
199          * 
200          * @param begin
201          *                   The begin index
202          * @return <code>true</code> iff this token look like an URL, <code>false</code>
203          *               otherwise
204          */
205         protected final boolean isUrlToken(final int begin) {
206
207                 for (int index= 0; index < DefaultSpellChecker.URL_PREFIXES.length; index++) {
208
209                         if (fContent.startsWith(DefaultSpellChecker.URL_PREFIXES[index], begin))
210                                 return true;
211                 }
212                 return false;
213         }
214
215         /**
216          * Does the specified token consist of whitespace only?
217          * 
218          * @param begin
219          *                   The begin index
220          * @param end
221          *                   The end index
222          * @return <code>true</code> iff the token consists of whitespace only,
223          *               <code>false</code> otherwise
224          */
225         protected final boolean isWhitespace(final int begin, final int end) {
226
227                 for (int index= begin; index < end; index++) {
228
229                         if (!Character.isWhitespace(fContent.charAt(index)))
230                                 return false;
231                 }
232                 return true;
233         }
234
235         /*
236          * @see java.util.Iterator#next()
237          */
238         public final Object next() {
239
240                 String token= nextToken();
241                 while (token == null && fSuccessor != BreakIterator.DONE)
242                         token= nextToken();
243
244                 fLastToken= token;
245
246                 return token;
247         }
248
249         /**
250          * Advances the end index to the next word break.
251          */
252         protected final void nextBreak() {
253
254                 fNext= fSuccessor;
255                 fPredecessor= fSuccessor;
256
257                 fSuccessor= fWordIterator.next();
258         }
259
260         /**
261          * Returns the next sentence break.
262          * 
263          * @return The next sentence break
264          */
265         protected final int nextSentence() {
266                 return ((Integer) fSentenceBreaks.getFirst()).intValue();
267         }
268
269         /**
270          * Determines the next token to be spell-checked.
271          * 
272          * @return The next token to be spell-checked, or <code>null</code> iff
273          *               the next token is not a candidate for spell-checking.
274          */
275         protected String nextToken() {
276
277                 String token= null;
278
279                 fPrevious= fPredecessor;
280                 fStartsSentence= false;
281
282                 nextBreak();
283
284                 boolean update= false;
285                 if (fNext - fPrevious > 0) {
286
287                         if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == JAVADOC_TAG_PREFIX) {
288
289                                 nextBreak();
290                                 if (Character.isLetter(fContent.charAt(fPrevious + 1))) {
291                                         update= true;
292                                         token= fContent.substring(fPrevious, fNext);
293                                 } else
294                                         fPredecessor= fNext;
295
296                         } else if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == HTML_TAG_PREFIX && (Character.isLetter(fContent.charAt(fNext)) || fContent.charAt(fNext) == '/')) {
297
298                                 if (fContent.startsWith(HTML_CLOSE_PREFIX, fPrevious))
299                                         nextBreak();
300
301                                 nextBreak();
302
303                                 if (fSuccessor != BreakIterator.DONE && fContent.charAt(fNext) == HTML_TAG_POSTFIX) {
304
305                                         nextBreak();
306                                         if (fSuccessor != BreakIterator.DONE) {
307                                                 update= true;
308                                                 token= fContent.substring(fPrevious, fNext);
309                                         }
310                                 }
311                         } else if (!isWhitespace(fPrevious, fNext) && isAlphaNumeric(fPrevious, fNext)) {
312
313                                 if (isUrlToken(fPrevious))
314                                         skipTokens(fPrevious, ' ');
315                                 else if (isJavadocToken(JAVADOC_PARAM_TAGS))
316                                         fLastToken= null;
317                                 else if (isJavadocToken(JAVADOC_REFERENCE_TAGS)) {
318                                         fLastToken= null;
319                                         skipTokens(fPrevious, fDelimiter.charAt(0));
320                                 } else if (fNext - fPrevious > 1 || isSingleLetter(fPrevious))
321                                         token= fContent.substring(fPrevious, fNext);
322
323                                 update= true;
324                         }
325                 }
326
327                 if (update && fSentenceBreaks.size() > 0) {
328
329                         if (fPrevious >= nextSentence()) {
330
331                                 while (fSentenceBreaks.size() > 0 && fPrevious >= nextSentence())
332                                         fSentenceBreaks.removeFirst();
333
334                                 fStartsSentence= (fLastToken == null) || (token != null);
335                         }
336                 }
337                 return token;
338         }
339
340         /*
341          * @see java.util.Iterator#remove()
342          */
343         public final void remove() {
344                 throw new UnsupportedOperationException();
345         }
346
347         /**
348          * Skip the tokens until the stop character is reached.
349          * 
350          * @param begin
351          *                   The begin index
352          * @param stop
353          *                   The stop character
354          */
355         protected final void skipTokens(final int begin, final char stop) {
356
357                 int end= begin;
358
359                 while (end < fContent.length() && fContent.charAt(end) != stop)
360                         end++;
361
362                 if (end < fContent.length()) {
363
364                         fNext= end;
365                         fPredecessor= fNext;
366
367                         fSuccessor= fWordIterator.following(fNext);
368                 } else
369                         fSuccessor= BreakIterator.DONE;
370         }
371
372         /*
373          * @see org.eclipse.spelling.done.ISpellCheckIterator#startsSentence()
374          */
375         public final boolean startsSentence() {
376                 return fStartsSentence;
377         }
378 }