1 /*******************************************************************************
2 * Copyright (c) 2000, 2003 IBM Corporation and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v10.html
9 * IBM Corporation - initial API and implementation
10 *******************************************************************************/
12 package net.sourceforge.phpdt.internal.ui.text.spelling.engine;
14 import java.io.BufferedReader;
15 import java.io.IOException;
16 import java.io.InputStream;
17 import java.io.InputStreamReader;
18 import java.net.MalformedURLException;
20 import java.util.ArrayList;
21 import java.util.HashMap;
22 import java.util.HashSet;
23 import java.util.List;
28 * Partial implementation of a spell dictionary.
32 public abstract class AbstractSpellDictionary implements ISpellDictionary {
34 /** The bucket capacity */
35 protected static final int BUCKET_CAPACITY = 4;
37 /** The word buffer capacity */
38 protected static final int BUFFER_CAPACITY = 32;
40 /** The distance threshold */
41 protected static final int DISTANCE_THRESHOLD = 160;
43 /** The hash capacity */
44 protected static final int HASH_CAPACITY = 22 * 1024;
46 /** The phonetic distance algorithm */
47 private IPhoneticDistanceAlgorithm fDistanceAlgorithm = new DefaultPhoneticDistanceAlgorithm();
49 /** The mapping from phonetic hashes to word lists */
50 private final Map fHashBuckets = new HashMap(HASH_CAPACITY);
52 /** The phonetic hash provider */
53 private IPhoneticHashProvider fHashProvider = new DefaultPhoneticHashProvider();
55 /** Is the dictionary already loaded? */
56 private boolean fLoaded = false;
59 * Returns all candidates with the same phonetic hash.
62 * The hash to retrieve the candidates of
63 * @return Array of candidates for the phonetic hash
65 protected final ArrayList getCandidates(final String hash) {
67 ArrayList list = (ArrayList) fHashBuckets.get(hash);
69 list = new ArrayList(0);
75 * Returns all candidates that have a phonetic hash within a bounded
76 * distance to the specified word.
79 * The word to find the nearest matches for
81 * <code>true</code> iff the proposals start a new sentence,
82 * <code>false</code> otherwise
84 * Array of close hashes to find the matches
85 * @return Set of ranked words with bounded distance to the specified word
87 protected final HashSet getCandidates(final String word,
88 final boolean sentence, final ArrayList hashs) {
93 String candidate = null;
94 List candidates = null;
96 final StringBuffer buffer = new StringBuffer(BUFFER_CAPACITY);
97 final HashSet result = new HashSet(BUCKET_CAPACITY * hashs.size());
99 for (int index = 0; index < hashs.size(); index++) {
101 hash = (String) hashs.get(index);
102 candidates = getCandidates(hash);
104 for (int offset = 0; offset < candidates.size(); offset++) {
106 candidate = (String) candidates.get(offset);
107 distance = fDistanceAlgorithm.getDistance(word, candidate);
109 if (distance < DISTANCE_THRESHOLD) {
112 buffer.append(candidate);
115 buffer.setCharAt(0, Character.toUpperCase(buffer
118 result.add(new RankedWordProposal(buffer.toString(),
127 * Returns all approximations that have a phonetic hash with smallest
128 * possible distance to the specified word.
131 * The word to find the nearest matches for
133 * <code>true</code> iff the proposals start a new sentence,
134 * <code>false</code> otherwise
136 * Set of ranked words with smallest possible distance to the
139 protected final void getCandidates(final String word,
140 final boolean sentence, final HashSet result) {
143 int minimum = Integer.MAX_VALUE;
145 String candidate = null;
146 StringBuffer buffer = new StringBuffer(BUFFER_CAPACITY);
148 final ArrayList candidates = getCandidates(fHashProvider.getHash(word));
149 final ArrayList matches = new ArrayList(candidates.size());
151 for (int index = 0; index < candidates.size(); index++) {
153 candidate = (String) candidates.get(index);
154 distance = fDistanceAlgorithm.getDistance(word, candidate);
156 if (distance <= minimum) {
159 buffer.append(candidate);
163 .setCharAt(0, Character.toUpperCase(buffer
167 .add(new RankedWordProposal(buffer.toString(),
173 RankedWordProposal match = null;
175 for (int index = 0; index < matches.size(); index++) {
177 match = (RankedWordProposal) matches.get(index);
178 if (match.getRank() == minimum)
184 * Returns the used phonetic distance algorithm.
186 * @return The phonetic distance algorithm
188 protected final IPhoneticDistanceAlgorithm getDistanceAlgorithm() {
189 return fDistanceAlgorithm;
193 * Returns the used phonetic hash provider.
195 * @return The phonetic hash provider
197 protected final IPhoneticHashProvider getHashProvider() {
198 return fHashProvider;
202 * @see net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellDictionary#getProposals(java.lang.String,boolean)
204 public Set getProposals(final String word, final boolean sentence) {
211 } catch (MalformedURLException exception) {
215 final String hash = fHashProvider.getHash(word);
216 final char[] mutators = fHashProvider.getMutators();
218 final ArrayList neighborhood = new ArrayList((word.length() + 1)
219 * (mutators.length + 2));
220 neighborhood.add(hash);
222 final HashSet candidates = getCandidates(word, sentence, neighborhood);
223 neighborhood.clear();
228 char[] characters = word.toCharArray();
229 for (int index = 0; index < word.length() - 1; index++) {
231 next = characters[index];
232 previous = characters[index + 1];
234 characters[index] = previous;
235 characters[index + 1] = next;
237 neighborhood.add(fHashProvider.getHash(new String(characters)));
239 characters[index] = next;
240 characters[index + 1] = previous;
243 final String sentinel = word + " "; //$NON-NLS-1$
245 characters = sentinel.toCharArray();
246 int offset = characters.length - 1;
250 for (int index = 0; index < mutators.length; index++) {
252 characters[offset] = mutators[index];
253 neighborhood.add(fHashProvider.getHash(new String(characters)));
259 characters[offset] = characters[offset - 1];
264 characters = word.toCharArray();
266 for (int index = 0; index < word.length(); index++) {
268 mutated = characters[index];
269 for (int mutator = 0; mutator < mutators.length; mutator++) {
271 characters[index] = mutators[mutator];
272 neighborhood.add(fHashProvider.getHash(new String(characters)));
274 characters[index] = mutated;
277 characters = word.toCharArray();
278 final char[] deleted = new char[characters.length - 1];
280 for (int index = 0; index < deleted.length; index++)
281 deleted[index] = characters[index];
283 next = characters[characters.length - 1];
284 offset = deleted.length;
288 neighborhood.add(fHashProvider.getHash(new String(characters)));
293 next = deleted[offset - 1];
295 deleted[offset - 1] = previous;
299 neighborhood.remove(hash);
300 final HashSet matches = getCandidates(word, sentence, neighborhood);
302 if (matches.size() == 0 && candidates.size() == 0)
303 getCandidates(word, sentence, candidates);
305 candidates.addAll(matches);
311 * Returns the URL of the dictionary word list.
313 * @throws MalformedURLException
314 * if the URL could not be retrieved
315 * @return The URL of the dictionary word list
317 protected abstract URL getURL() throws MalformedURLException;
320 * Hashes the word into the dictionary.
323 * The word to hash in the dictionary
325 protected final void hashWord(final String word) {
327 final String hash = fHashProvider.getHash(word);
328 ArrayList bucket = (ArrayList) fHashBuckets.get(hash);
330 if (bucket == null) {
332 bucket = new ArrayList(BUCKET_CAPACITY);
333 fHashBuckets.put(hash, bucket);
340 * @see net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellDictionary#isCorrect(java.lang.String)
342 public boolean isCorrect(final String word) {
349 } catch (MalformedURLException exception) {
353 final ArrayList candidates = getCandidates(fHashProvider.getHash(word));
355 if (candidates.contains(word)
356 || candidates.contains(word.toLowerCase()))
363 * @see net.sourceforge.phpdt.ui.text.spelling.engine.ISpellDictionary#isLoaded()
365 public final synchronized boolean isLoaded() {
366 return fLoaded || fHashBuckets.size() > 0;
370 * Loads a dictionary word list from disk.
373 * The URL of the word list to load
374 * @return <code>true</code> iff the word list could be loaded,
375 * <code>false</code> otherwise
377 protected synchronized boolean load(final URL url) {
383 final InputStream stream = url.openStream();
384 if (stream != null) {
388 final BufferedReader reader = new BufferedReader(
389 new InputStreamReader(stream));
390 while ((word = reader.readLine()) != null)
393 return fLoaded = true;
395 } catch (IOException exception) {
403 * Sets the phonetic distance algorithm to use.
406 * The phonetic distance algorithm
408 protected final void setDistanceAlgorithm(
409 final IPhoneticDistanceAlgorithm algorithm) {
410 fDistanceAlgorithm = algorithm;
414 * Sets the phonetic hash provider to use.
417 * The phonetic hash provider
419 protected final void setHashProvider(final IPhoneticHashProvider provider) {
420 fHashProvider = provider;
424 * @see net.sourceforge.phpdt.ui.text.spelling.engine.ISpellDictionary#unload()
426 public synchronized void unload() {
429 fHashBuckets.clear();
433 * @see net.sourceforge.phpdt.ui.text.spelling.engine.ISpellDictionary#acceptsWords()
435 public boolean acceptsWords() {
440 * @see net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellDictionary#addWord(java.lang.String)
442 public void addWord(final String word) {