/******************************************************************************* * Copyright (c) 2000, 2003 IBM Corporation and others. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Common Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/cpl-v10.html * * Contributors: * IBM Corporation - initial API and implementation *******************************************************************************/ package net.sourceforge.phpdt.internal.ui.text.spelling.engine; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; /** * Partial implementation of a spell dictionary. * * @since 3.0 */ public abstract class AbstractSpellDictionary implements ISpellDictionary { /** The bucket capacity */ protected static final int BUCKET_CAPACITY = 4; /** The word buffer capacity */ protected static final int BUFFER_CAPACITY = 32; /** The distance threshold */ protected static final int DISTANCE_THRESHOLD = 160; /** The hash capacity */ protected static final int HASH_CAPACITY = 22 * 1024; /** The phonetic distance algorithm */ private IPhoneticDistanceAlgorithm fDistanceAlgorithm = new DefaultPhoneticDistanceAlgorithm(); /** The mapping from phonetic hashes to word lists */ private final Map fHashBuckets = new HashMap(HASH_CAPACITY); /** The phonetic hash provider */ private IPhoneticHashProvider fHashProvider = new DefaultPhoneticHashProvider(); /** Is the dictionary already loaded? */ private boolean fLoaded = false; /** * Returns all candidates with the same phonetic hash. * * @param hash * The hash to retrieve the candidates of * @return Array of candidates for the phonetic hash */ protected final ArrayList getCandidates(final String hash) { ArrayList list = (ArrayList) fHashBuckets.get(hash); if (list == null) list = new ArrayList(0); return list; } /** * Returns all candidates that have a phonetic hash within a bounded * distance to the specified word. * * @param word * The word to find the nearest matches for * @param sentence * true iff the proposals start a new sentence, * false otherwise * @param hashs * Array of close hashes to find the matches * @return Set of ranked words with bounded distance to the specified word */ protected final HashSet getCandidates(final String word, final boolean sentence, final ArrayList hashs) { int distance = 0; String hash = null; String candidate = null; List candidates = null; final StringBuffer buffer = new StringBuffer(BUFFER_CAPACITY); final HashSet result = new HashSet(BUCKET_CAPACITY * hashs.size()); for (int index = 0; index < hashs.size(); index++) { hash = (String) hashs.get(index); candidates = getCandidates(hash); for (int offset = 0; offset < candidates.size(); offset++) { candidate = (String) candidates.get(offset); distance = fDistanceAlgorithm.getDistance(word, candidate); if (distance < DISTANCE_THRESHOLD) { buffer.setLength(0); buffer.append(candidate); if (sentence) buffer.setCharAt(0, Character.toUpperCase(buffer .charAt(0))); result.add(new RankedWordProposal(buffer.toString(), -distance)); } } } return result; } /** * Returns all approximations that have a phonetic hash with smallest * possible distance to the specified word. * * @param word * The word to find the nearest matches for * @param sentence * true iff the proposals start a new sentence, * false otherwise * @param result * Set of ranked words with smallest possible distance to the * specified word */ protected final void getCandidates(final String word, final boolean sentence, final HashSet result) { int distance = 0; int minimum = Integer.MAX_VALUE; String candidate = null; StringBuffer buffer = new StringBuffer(BUFFER_CAPACITY); final ArrayList candidates = getCandidates(fHashProvider.getHash(word)); final ArrayList matches = new ArrayList(candidates.size()); for (int index = 0; index < candidates.size(); index++) { candidate = (String) candidates.get(index); distance = fDistanceAlgorithm.getDistance(word, candidate); if (distance <= minimum) { buffer.setLength(0); buffer.append(candidate); if (sentence) buffer .setCharAt(0, Character.toUpperCase(buffer .charAt(0))); matches .add(new RankedWordProposal(buffer.toString(), -distance)); minimum = distance; } } RankedWordProposal match = null; for (int index = 0; index < matches.size(); index++) { match = (RankedWordProposal) matches.get(index); if (match.getRank() == minimum) result.add(match); } } /** * Returns the used phonetic distance algorithm. * * @return The phonetic distance algorithm */ protected final IPhoneticDistanceAlgorithm getDistanceAlgorithm() { return fDistanceAlgorithm; } /** * Returns the used phonetic hash provider. * * @return The phonetic hash provider */ protected final IPhoneticHashProvider getHashProvider() { return fHashProvider; } /* * @see net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellDictionary#getProposals(java.lang.String,boolean) */ public Set getProposals(final String word, final boolean sentence) { try { if (!fLoaded) load(getURL()); } catch (MalformedURLException exception) { // Do nothing } final String hash = fHashProvider.getHash(word); final char[] mutators = fHashProvider.getMutators(); final ArrayList neighborhood = new ArrayList((word.length() + 1) * (mutators.length + 2)); neighborhood.add(hash); final HashSet candidates = getCandidates(word, sentence, neighborhood); neighborhood.clear(); char previous = 0; char next = 0; char[] characters = word.toCharArray(); for (int index = 0; index < word.length() - 1; index++) { next = characters[index]; previous = characters[index + 1]; characters[index] = previous; characters[index + 1] = next; neighborhood.add(fHashProvider.getHash(new String(characters))); characters[index] = next; characters[index + 1] = previous; } final String sentinel = word + " "; //$NON-NLS-1$ characters = sentinel.toCharArray(); int offset = characters.length - 1; while (true) { for (int index = 0; index < mutators.length; index++) { characters[offset] = mutators[index]; neighborhood.add(fHashProvider.getHash(new String(characters))); } if (offset == 0) break; characters[offset] = characters[offset - 1]; --offset; } char mutated = 0; characters = word.toCharArray(); for (int index = 0; index < word.length(); index++) { mutated = characters[index]; for (int mutator = 0; mutator < mutators.length; mutator++) { characters[index] = mutators[mutator]; neighborhood.add(fHashProvider.getHash(new String(characters))); } characters[index] = mutated; } characters = word.toCharArray(); final char[] deleted = new char[characters.length - 1]; for (int index = 0; index < deleted.length; index++) deleted[index] = characters[index]; next = characters[characters.length - 1]; offset = deleted.length; while (true) { neighborhood.add(fHashProvider.getHash(new String(characters))); if (offset == 0) break; previous = next; next = deleted[offset - 1]; deleted[offset - 1] = previous; --offset; } neighborhood.remove(hash); final HashSet matches = getCandidates(word, sentence, neighborhood); if (matches.size() == 0 && candidates.size() == 0) getCandidates(word, sentence, candidates); candidates.addAll(matches); return candidates; } /** * Returns the URL of the dictionary word list. * * @throws MalformedURLException * if the URL could not be retrieved * @return The URL of the dictionary word list */ protected abstract URL getURL() throws MalformedURLException; /** * Hashes the word into the dictionary. * * @param word * The word to hash in the dictionary */ protected final void hashWord(final String word) { final String hash = fHashProvider.getHash(word); ArrayList bucket = (ArrayList) fHashBuckets.get(hash); if (bucket == null) { bucket = new ArrayList(BUCKET_CAPACITY); fHashBuckets.put(hash, bucket); } bucket.add(word); } /* * @see net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellDictionary#isCorrect(java.lang.String) */ public boolean isCorrect(final String word) { try { if (!fLoaded) load(getURL()); } catch (MalformedURLException exception) { // Do nothing } final ArrayList candidates = getCandidates(fHashProvider.getHash(word)); if (candidates.contains(word) || candidates.contains(word.toLowerCase())) return true; return false; } /* * @see net.sourceforge.phpdt.ui.text.spelling.engine.ISpellDictionary#isLoaded() */ public final synchronized boolean isLoaded() { return fLoaded || fHashBuckets.size() > 0; } /** * Loads a dictionary word list from disk. * * @param url * The URL of the word list to load * @return true iff the word list could be loaded, * false otherwise */ protected synchronized boolean load(final URL url) { if (url != null) { try { final InputStream stream = url.openStream(); if (stream != null) { String word = null; final BufferedReader reader = new BufferedReader( new InputStreamReader(stream)); while ((word = reader.readLine()) != null) hashWord(word); return fLoaded = true; } } catch (IOException exception) { // Do nothing } } return false; } /** * Sets the phonetic distance algorithm to use. * * @param algorithm * The phonetic distance algorithm */ protected final void setDistanceAlgorithm( final IPhoneticDistanceAlgorithm algorithm) { fDistanceAlgorithm = algorithm; } /** * Sets the phonetic hash provider to use. * * @param provider * The phonetic hash provider */ protected final void setHashProvider(final IPhoneticHashProvider provider) { fHashProvider = provider; } /* * @see net.sourceforge.phpdt.ui.text.spelling.engine.ISpellDictionary#unload() */ public synchronized void unload() { fLoaded = false; fHashBuckets.clear(); } /* * @see net.sourceforge.phpdt.ui.text.spelling.engine.ISpellDictionary#acceptsWords() */ public boolean acceptsWords() { return false; } /* * @see net.sourceforge.phpdt.internal.ui.text.spelling.engine.ISpellDictionary#addWord(java.lang.String) */ public void addWord(final String word) { // Do nothing } }