improved PHP parser
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / ui / text / spelling / engine / AbstractSpellDictionary.java
1 /*******************************************************************************
2  * Copyright (c) 2000, 2003 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials 
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  * 
8  * Contributors:
9  *     IBM Corporation - initial API and implementation
10  *******************************************************************************/
11
12 package net.sourceforge.phpdt.internal.ui.text.spelling.engine;
13
14 import java.io.BufferedReader;
15 import java.io.IOException;
16 import java.io.InputStream;
17 import java.io.InputStreamReader;
18 import java.net.MalformedURLException;
19 import java.net.URL;
20 import java.util.ArrayList;
21 import java.util.HashMap;
22 import java.util.HashSet;
23 import java.util.List;
24 import java.util.Map;
25 import java.util.Set;
26
27 /**
28  * Partial implementation of a spell dictionary.
29  * 
30  * @since 3.0
31  */
32 public abstract class AbstractSpellDictionary implements ISpellDictionary {
33
34         /** The bucket capacity */
35         protected static final int BUCKET_CAPACITY= 4;
36
37         /** The word buffer capacity */
38         protected static final int BUFFER_CAPACITY= 32;
39
40         /** The distance threshold */
41         protected static final int DISTANCE_THRESHOLD= 160;
42
43         /** The hash capacity */
44         protected static final int HASH_CAPACITY= 22 * 1024;
45
46         /** The phonetic distance algorithm */
47         private IPhoneticDistanceAlgorithm fDistanceAlgorithm= new DefaultPhoneticDistanceAlgorithm();
48
49         /** The mapping from phonetic hashes to word lists */
50         private final Map fHashBuckets= new HashMap(HASH_CAPACITY);
51
52         /** The phonetic hash provider */
53         private IPhoneticHashProvider fHashProvider= new DefaultPhoneticHashProvider();
54
55         /** Is the dictionary already loaded? */
56         private boolean fLoaded= false;
57
58         /**
59          * Returns all candidates with the same phonetic hash.
60          * 
61          * @param hash
62          *                   The hash to retrieve the candidates of
63          * @return Array of candidates for the phonetic hash
64          */
65         protected final ArrayList getCandidates(final String hash) {
66
67                 ArrayList list= (ArrayList)fHashBuckets.get(hash);
68                 if (list == null)
69                         list= new ArrayList(0);
70
71                 return list;
72         }
73
74         /**
75          * Returns all candidates that have a phonetic hash within a bounded
76          * distance to the specified word.
77          * 
78          * @param word
79          *                   The word to find the nearest matches for
80          * @param sentence
81          *                   <code>true</code> iff the proposals start a new sentence,
82          *                   <code>false</code> otherwise
83          * @param hashs
84          *                   Array of close hashes to find the matches
85          * @return Set of ranked words with bounded distance to the specified word
86          */
87         protected final HashSet getCandidates(final String word, final boolean sentence, final ArrayList hashs) {
88
89                 int distance= 0;
90                 String hash= null;
91
92                 String candidate= null;
93                 List candidates= null;
94
95                 final StringBuffer buffer= new StringBuffer(BUFFER_CAPACITY);
96                 final HashSet result= new HashSet(BUCKET_CAPACITY * hashs.size());
97
98                 for (int index= 0; index < hashs.size(); index++) {
99
100                         hash= (String)hashs.get(index);
101                         candidates= getCandidates(hash);
102
103                         for (int offset= 0; offset < candidates.size(); offset++) {
104
105                                 candidate= (String)candidates.get(offset);
106                                 distance= fDistanceAlgorithm.getDistance(word, candidate);
107
108                                 if (distance < DISTANCE_THRESHOLD) {
109
110                                         buffer.setLength(0);
111                                         buffer.append(candidate);
112
113                                         if (sentence)
114                                                 buffer.setCharAt(0, Character.toUpperCase(buffer.charAt(0)));
115
116                                         result.add(new RankedWordProposal(buffer.toString(), -distance));
117                                 }
118                         }
119                 }
120                 return result;
121         }
122
123         /**
124          * Returns all approximations that have a phonetic hash with smallest
125          * possible distance to the specified word.
126          * 
127          * @param word
128          *                   The word to find the nearest matches for
129          * @param sentence
130          *                   <code>true</code> iff the proposals start a new sentence,
131          *                   <code>false</code> otherwise
132          * @param result
133          *                   Set of ranked words with smallest possible distance to the
134          *                   specified word
135          */
136         protected final void getCandidates(final String word, final boolean sentence, final HashSet result) {
137
138                 int distance= 0;
139                 int minimum= Integer.MAX_VALUE;
140
141                 String candidate= null;
142                 StringBuffer buffer= new StringBuffer(BUFFER_CAPACITY);
143
144                 final ArrayList candidates= getCandidates(fHashProvider.getHash(word));
145                 final ArrayList matches= new ArrayList(candidates.size());
146
147                 for (int index= 0; index < candidates.size(); index++) {
148
149                         candidate= (String)candidates.get(index);
150                         distance= fDistanceAlgorithm.getDistance(word, candidate);
151
152                         if (distance <= minimum) {
153
154                                 buffer.setLength(0);
155                                 buffer.append(candidate);
156
157                                 if (sentence)
158                                         buffer.setCharAt(0, Character.toUpperCase(buffer.charAt(0)));
159
160                                 matches.add(new RankedWordProposal(buffer.toString(), -distance));
161                                 minimum= distance;
162                         }
163                 }
164
165                 RankedWordProposal match= null;
166
167                 for (int index= 0; index < matches.size(); index++) {
168
169                         match= (RankedWordProposal)matches.get(index);
170                         if (match.getRank() == minimum)
171                                 result.add(match);
172                 }
173         }
174
175         /**
176          * Returns the used phonetic distance algorithm.
177          * 
178          * @return The phonetic distance algorithm
179          */
180         protected final IPhoneticDistanceAlgorithm getDistanceAlgorithm() {
181                 return fDistanceAlgorithm;
182         }
183
184         /**
185          * Returns the used phonetic hash provider.
186          * 
187          * @return The phonetic hash provider
188          */
189         protected final IPhoneticHashProvider getHashProvider() {
190                 return fHashProvider;
191         }
192
193         /*
194          * @see net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellDictionary#getProposals(java.lang.String,boolean)
195          */
196         public Set getProposals(final String word, final boolean sentence) {
197
198                 try {
199
200                         if (!fLoaded)
201                                 load(getURL());
202
203                 } catch (MalformedURLException exception) {
204                         // Do nothing
205                 }
206
207                 final String hash= fHashProvider.getHash(word);
208                 final char[] mutators= fHashProvider.getMutators();
209
210                 final ArrayList neighborhood= new ArrayList((word.length() + 1) * (mutators.length + 2));
211                 neighborhood.add(hash);
212
213                 final HashSet candidates= getCandidates(word, sentence, neighborhood);
214                 neighborhood.clear();
215
216                 char previous= 0;
217                 char next= 0;
218
219                 char[] characters= word.toCharArray();
220                 for (int index= 0; index < word.length() - 1; index++) {
221
222                         next= characters[index];
223                         previous= characters[index + 1];
224
225                         characters[index]= previous;
226                         characters[index + 1]= next;
227
228                         neighborhood.add(fHashProvider.getHash(new String(characters)));
229
230                         characters[index]= next;
231                         characters[index + 1]= previous;
232                 }
233
234                 final String sentinel= word + " "; //$NON-NLS-1$
235
236                 characters= sentinel.toCharArray();
237                 int offset= characters.length - 1;
238
239                 while (true) {
240
241                         for (int index= 0; index < mutators.length; index++) {
242
243                                 characters[offset]= mutators[index];
244                                 neighborhood.add(fHashProvider.getHash(new String(characters)));
245                         }
246
247                         if (offset == 0)
248                                 break;
249
250                         characters[offset]= characters[offset - 1];
251                         --offset;
252                 }
253
254                 char mutated= 0;
255                 characters= word.toCharArray();
256
257                 for (int index= 0; index < word.length(); index++) {
258
259                         mutated= characters[index];
260                         for (int mutator= 0; mutator < mutators.length; mutator++) {
261
262                                 characters[index]= mutators[mutator];
263                                 neighborhood.add(fHashProvider.getHash(new String(characters)));
264                         }
265                         characters[index]= mutated;
266                 }
267
268                 characters= word.toCharArray();
269                 final char[] deleted= new char[characters.length - 1];
270
271                 for (int index= 0; index < deleted.length; index++)
272                         deleted[index]= characters[index];
273
274                 next= characters[characters.length - 1];
275                 offset= deleted.length;
276
277                 while (true) {
278
279                         neighborhood.add(fHashProvider.getHash(new String(characters)));
280                         if (offset == 0)
281                                 break;
282
283                         previous= next;
284                         next= deleted[offset - 1];
285
286                         deleted[offset - 1]= previous;
287                         --offset;
288                 }
289
290                 neighborhood.remove(hash);
291                 final HashSet matches= getCandidates(word, sentence, neighborhood);
292
293                 if (matches.size() == 0 && candidates.size() == 0)
294                         getCandidates(word, sentence, candidates);
295
296                 candidates.addAll(matches);
297
298                 return candidates;
299         }
300
301         /**
302          * Returns the URL of the dictionary word list.
303          * 
304          * @throws MalformedURLException
305          *                    if the URL could not be retrieved
306          * @return The URL of the dictionary word list
307          */
308         protected abstract URL getURL() throws MalformedURLException;
309
310         /**
311          * Hashes the word into the dictionary.
312          * 
313          * @param word
314          *                   The word to hash in the dictionary
315          */
316         protected final void hashWord(final String word) {
317
318                 final String hash= fHashProvider.getHash(word);
319                 ArrayList bucket= (ArrayList)fHashBuckets.get(hash);
320
321                 if (bucket == null) {
322
323                         bucket= new ArrayList(BUCKET_CAPACITY);
324                         fHashBuckets.put(hash, bucket);
325                 }
326
327                 bucket.add(word);
328         }
329
330         /*
331          * @see net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellDictionary#isCorrect(java.lang.String)
332          */
333         public boolean isCorrect(final String word) {
334
335                 try {
336
337                         if (!fLoaded)
338                                 load(getURL());
339
340                 } catch (MalformedURLException exception) {
341                         // Do nothing
342                 }
343
344                 final ArrayList candidates= getCandidates(fHashProvider.getHash(word));
345
346                 if (candidates.contains(word) || candidates.contains(word.toLowerCase()))
347                         return true;
348
349                 return false;
350         }
351
352         /*
353          * @see net.sourceforge.phpdt.ui.text.spelling.engine.ISpellDictionary#isLoaded()
354          */
355         public final synchronized boolean isLoaded() {
356                 return fLoaded || fHashBuckets.size() > 0;
357         }
358
359         /**
360          * Loads a dictionary word list from disk.
361          * 
362          * @param url
363          *                   The URL of the word list to load
364          * @return <code>true</code> iff the word list could be loaded, <code>false</code>
365          *               otherwise
366          */
367         protected synchronized boolean load(final URL url) {
368
369                 if (url != null) {
370
371                         try {
372
373                                 final InputStream stream= url.openStream();
374                                 if (stream != null) {
375
376                                         String word= null;
377
378                                         final BufferedReader reader= new BufferedReader(new InputStreamReader(stream));
379                                         while ((word= reader.readLine()) != null)
380                                                 hashWord(word);
381
382                                         return fLoaded= true;
383                                 }
384                         } catch (IOException exception) {
385                                 // Do nothing
386                         }
387                 }
388                 return false;
389         }
390
391         /**
392          * Sets the phonetic distance algorithm to use.
393          * 
394          * @param algorithm
395          *                   The phonetic distance algorithm
396          */
397         protected final void setDistanceAlgorithm(final IPhoneticDistanceAlgorithm algorithm) {
398                 fDistanceAlgorithm= algorithm;
399         }
400
401         /**
402          * Sets the phonetic hash provider to use.
403          * 
404          * @param provider
405          *                   The phonetic hash provider
406          */
407         protected final void setHashProvider(final IPhoneticHashProvider provider) {
408                 fHashProvider= provider;
409         }
410
411         /*
412          * @see net.sourceforge.phpdt.ui.text.spelling.engine.ISpellDictionary#unload()
413          */
414         public synchronized void unload() {
415
416                 fLoaded= false;
417                 fHashBuckets.clear();
418         }
419         
420         /*
421          * @see net.sourceforge.phpdt.ui.text.spelling.engine.ISpellDictionary#acceptsWords()
422          */
423         public boolean acceptsWords() {
424                 return false;
425         }
426         
427         /*
428          * @see net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellDictionary#addWord(java.lang.String)
429          */
430         public void addWord(final String word) {
431                 // Do nothing
432         }
433 }