8e0bab58394aa7d68d737c9eee13c6fc56df1933
[phpeclipse.git] / net.sourceforge.phpeclipse.ui / src / net / sourceforge / phpdt / internal / ui / text / spelling / engine / AbstractSpellDictionary.java
1 /*******************************************************************************
2  * Copyright (c) 2000, 2003 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials 
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  * 
8  * Contributors:
9  *     IBM Corporation - initial API and implementation
10  *******************************************************************************/
11
12 package net.sourceforge.phpdt.internal.ui.text.spelling.engine;
13
14 import java.io.BufferedReader;
15 import java.io.IOException;
16 import java.io.InputStream;
17 import java.io.InputStreamReader;
18 import java.net.MalformedURLException;
19 import java.net.URL;
20 import java.util.ArrayList;
21 import java.util.HashMap;
22 import java.util.HashSet;
23 import java.util.List;
24 import java.util.Map;
25 import java.util.Set;
26
27 /**
28  * Partial implementation of a spell dictionary.
29  * 
30  * @since 3.0
31  */
32 public abstract class AbstractSpellDictionary implements ISpellDictionary {
33
34         /** The bucket capacity */
35         protected static final int BUCKET_CAPACITY = 4;
36
37         /** The word buffer capacity */
38         protected static final int BUFFER_CAPACITY = 32;
39
40         /** The distance threshold */
41         protected static final int DISTANCE_THRESHOLD = 160;
42
43         /** The hash capacity */
44         protected static final int HASH_CAPACITY = 22 * 1024;
45
46         /** The phonetic distance algorithm */
47         private IPhoneticDistanceAlgorithm fDistanceAlgorithm = new DefaultPhoneticDistanceAlgorithm();
48
49         /** The mapping from phonetic hashes to word lists */
50         private final Map fHashBuckets = new HashMap(HASH_CAPACITY);
51
52         /** The phonetic hash provider */
53         private IPhoneticHashProvider fHashProvider = new DefaultPhoneticHashProvider();
54
55         /** Is the dictionary already loaded? */
56         private boolean fLoaded = false;
57
58         /**
59          * Returns all candidates with the same phonetic hash.
60          * 
61          * @param hash
62          *            The hash to retrieve the candidates of
63          * @return Array of candidates for the phonetic hash
64          */
65         protected final ArrayList getCandidates(final String hash) {
66
67                 ArrayList list = (ArrayList) fHashBuckets.get(hash);
68                 if (list == null)
69                         list = new ArrayList(0);
70
71                 return list;
72         }
73
74         /**
75          * Returns all candidates that have a phonetic hash within a bounded
76          * distance to the specified word.
77          * 
78          * @param word
79          *            The word to find the nearest matches for
80          * @param sentence
81          *            <code>true</code> iff the proposals start a new sentence,
82          *            <code>false</code> otherwise
83          * @param hashs
84          *            Array of close hashes to find the matches
85          * @return Set of ranked words with bounded distance to the specified word
86          */
87         protected final HashSet getCandidates(final String word,
88                         final boolean sentence, final ArrayList hashs) {
89
90                 int distance = 0;
91                 String hash = null;
92
93                 String candidate = null;
94                 List candidates = null;
95
96                 final StringBuffer buffer = new StringBuffer(BUFFER_CAPACITY);
97                 final HashSet result = new HashSet(BUCKET_CAPACITY * hashs.size());
98
99                 for (int index = 0; index < hashs.size(); index++) {
100
101                         hash = (String) hashs.get(index);
102                         candidates = getCandidates(hash);
103
104                         for (int offset = 0; offset < candidates.size(); offset++) {
105
106                                 candidate = (String) candidates.get(offset);
107                                 distance = fDistanceAlgorithm.getDistance(word, candidate);
108
109                                 if (distance < DISTANCE_THRESHOLD) {
110
111                                         buffer.setLength(0);
112                                         buffer.append(candidate);
113
114                                         if (sentence)
115                                                 buffer.setCharAt(0, Character.toUpperCase(buffer
116                                                                 .charAt(0)));
117
118                                         result.add(new RankedWordProposal(buffer.toString(),
119                                                         -distance));
120                                 }
121                         }
122                 }
123                 return result;
124         }
125
126         /**
127          * Returns all approximations that have a phonetic hash with smallest
128          * possible distance to the specified word.
129          * 
130          * @param word
131          *            The word to find the nearest matches for
132          * @param sentence
133          *            <code>true</code> iff the proposals start a new sentence,
134          *            <code>false</code> otherwise
135          * @param result
136          *            Set of ranked words with smallest possible distance to the
137          *            specified word
138          */
139         protected final void getCandidates(final String word,
140                         final boolean sentence, final HashSet result) {
141
142                 int distance = 0;
143                 int minimum = Integer.MAX_VALUE;
144
145                 String candidate = null;
146                 StringBuffer buffer = new StringBuffer(BUFFER_CAPACITY);
147
148                 final ArrayList candidates = getCandidates(fHashProvider.getHash(word));
149                 final ArrayList matches = new ArrayList(candidates.size());
150
151                 for (int index = 0; index < candidates.size(); index++) {
152
153                         candidate = (String) candidates.get(index);
154                         distance = fDistanceAlgorithm.getDistance(word, candidate);
155
156                         if (distance <= minimum) {
157
158                                 buffer.setLength(0);
159                                 buffer.append(candidate);
160
161                                 if (sentence)
162                                         buffer
163                                                         .setCharAt(0, Character.toUpperCase(buffer
164                                                                         .charAt(0)));
165
166                                 matches
167                                                 .add(new RankedWordProposal(buffer.toString(),
168                                                                 -distance));
169                                 minimum = distance;
170                         }
171                 }
172
173                 RankedWordProposal match = null;
174
175                 for (int index = 0; index < matches.size(); index++) {
176
177                         match = (RankedWordProposal) matches.get(index);
178                         if (match.getRank() == minimum)
179                                 result.add(match);
180                 }
181         }
182
183         /**
184          * Returns the used phonetic distance algorithm.
185          * 
186          * @return The phonetic distance algorithm
187          */
188         protected final IPhoneticDistanceAlgorithm getDistanceAlgorithm() {
189                 return fDistanceAlgorithm;
190         }
191
192         /**
193          * Returns the used phonetic hash provider.
194          * 
195          * @return The phonetic hash provider
196          */
197         protected final IPhoneticHashProvider getHashProvider() {
198                 return fHashProvider;
199         }
200
201         /*
202          * @see net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellDictionary#getProposals(java.lang.String,boolean)
203          */
204         public Set getProposals(final String word, final boolean sentence) {
205
206                 try {
207
208                         if (!fLoaded)
209                                 load(getURL());
210
211                 } catch (MalformedURLException exception) {
212                         // Do nothing
213                 }
214
215                 final String hash = fHashProvider.getHash(word);
216                 final char[] mutators = fHashProvider.getMutators();
217
218                 final ArrayList neighborhood = new ArrayList((word.length() + 1)
219                                 * (mutators.length + 2));
220                 neighborhood.add(hash);
221
222                 final HashSet candidates = getCandidates(word, sentence, neighborhood);
223                 neighborhood.clear();
224
225                 char previous = 0;
226                 char next = 0;
227
228                 char[] characters = word.toCharArray();
229                 for (int index = 0; index < word.length() - 1; index++) {
230
231                         next = characters[index];
232                         previous = characters[index + 1];
233
234                         characters[index] = previous;
235                         characters[index + 1] = next;
236
237                         neighborhood.add(fHashProvider.getHash(new String(characters)));
238
239                         characters[index] = next;
240                         characters[index + 1] = previous;
241                 }
242
243                 final String sentinel = word + " "; //$NON-NLS-1$
244
245                 characters = sentinel.toCharArray();
246                 int offset = characters.length - 1;
247
248                 while (true) {
249
250                         for (int index = 0; index < mutators.length; index++) {
251
252                                 characters[offset] = mutators[index];
253                                 neighborhood.add(fHashProvider.getHash(new String(characters)));
254                         }
255
256                         if (offset == 0)
257                                 break;
258
259                         characters[offset] = characters[offset - 1];
260                         --offset;
261                 }
262
263                 char mutated = 0;
264                 characters = word.toCharArray();
265
266                 for (int index = 0; index < word.length(); index++) {
267
268                         mutated = characters[index];
269                         for (int mutator = 0; mutator < mutators.length; mutator++) {
270
271                                 characters[index] = mutators[mutator];
272                                 neighborhood.add(fHashProvider.getHash(new String(characters)));
273                         }
274                         characters[index] = mutated;
275                 }
276
277                 characters = word.toCharArray();
278                 final char[] deleted = new char[characters.length - 1];
279
280                 for (int index = 0; index < deleted.length; index++)
281                         deleted[index] = characters[index];
282
283                 next = characters[characters.length - 1];
284                 offset = deleted.length;
285
286                 while (true) {
287
288                         neighborhood.add(fHashProvider.getHash(new String(characters)));
289                         if (offset == 0)
290                                 break;
291
292                         previous = next;
293                         next = deleted[offset - 1];
294
295                         deleted[offset - 1] = previous;
296                         --offset;
297                 }
298
299                 neighborhood.remove(hash);
300                 final HashSet matches = getCandidates(word, sentence, neighborhood);
301
302                 if (matches.size() == 0 && candidates.size() == 0)
303                         getCandidates(word, sentence, candidates);
304
305                 candidates.addAll(matches);
306
307                 return candidates;
308         }
309
310         /**
311          * Returns the URL of the dictionary word list.
312          * 
313          * @throws MalformedURLException
314          *             if the URL could not be retrieved
315          * @return The URL of the dictionary word list
316          */
317         protected abstract URL getURL() throws MalformedURLException;
318
319         /**
320          * Hashes the word into the dictionary.
321          * 
322          * @param word
323          *            The word to hash in the dictionary
324          */
325         protected final void hashWord(final String word) {
326
327                 final String hash = fHashProvider.getHash(word);
328                 ArrayList bucket = (ArrayList) fHashBuckets.get(hash);
329
330                 if (bucket == null) {
331
332                         bucket = new ArrayList(BUCKET_CAPACITY);
333                         fHashBuckets.put(hash, bucket);
334                 }
335
336                 bucket.add(word);
337         }
338
339         /*
340          * @see net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellDictionary#isCorrect(java.lang.String)
341          */
342         public boolean isCorrect(final String word) {
343
344                 try {
345
346                         if (!fLoaded)
347                                 load(getURL());
348
349                 } catch (MalformedURLException exception) {
350                         // Do nothing
351                 }
352
353                 final ArrayList candidates = getCandidates(fHashProvider.getHash(word));
354
355                 if (candidates.contains(word)
356                                 || candidates.contains(word.toLowerCase()))
357                         return true;
358
359                 return false;
360         }
361
362         /*
363          * @see net.sourceforge.phpdt.ui.text.spelling.engine.ISpellDictionary#isLoaded()
364          */
365         public final synchronized boolean isLoaded() {
366                 return fLoaded || fHashBuckets.size() > 0;
367         }
368
369         /**
370          * Loads a dictionary word list from disk.
371          * 
372          * @param url
373          *            The URL of the word list to load
374          * @return <code>true</code> iff the word list could be loaded,
375          *         <code>false</code> otherwise
376          */
377         protected synchronized boolean load(final URL url) {
378
379                 if (url != null) {
380
381                         try {
382
383                                 final InputStream stream = url.openStream();
384                                 if (stream != null) {
385
386                                         String word = null;
387
388                                         final BufferedReader reader = new BufferedReader(
389                                                         new InputStreamReader(stream));
390                                         while ((word = reader.readLine()) != null)
391                                                 hashWord(word);
392
393                                         return fLoaded = true;
394                                 }
395                         } catch (IOException exception) {
396                                 // Do nothing
397                         }
398                 }
399                 return false;
400         }
401
402         /**
403          * Sets the phonetic distance algorithm to use.
404          * 
405          * @param algorithm
406          *            The phonetic distance algorithm
407          */
408         protected final void setDistanceAlgorithm(
409                         final IPhoneticDistanceAlgorithm algorithm) {
410                 fDistanceAlgorithm = algorithm;
411         }
412
413         /**
414          * Sets the phonetic hash provider to use.
415          * 
416          * @param provider
417          *            The phonetic hash provider
418          */
419         protected final void setHashProvider(final IPhoneticHashProvider provider) {
420                 fHashProvider = provider;
421         }
422
423         /*
424          * @see net.sourceforge.phpdt.ui.text.spelling.engine.ISpellDictionary#unload()
425          */
426         public synchronized void unload() {
427
428                 fLoaded = false;
429                 fHashBuckets.clear();
430         }
431
432         /*
433          * @see net.sourceforge.phpdt.ui.text.spelling.engine.ISpellDictionary#acceptsWords()
434          */
435         public boolean acceptsWords() {
436                 return false;
437         }
438
439         /*
440          * @see net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellDictionary#addWord(java.lang.String)
441          */
442         public void addWord(final String word) {
443                 // Do nothing
444         }
445 }