2229bcf3fb4a3fb5596170cea7ec0c8b362bafe3
[phpeclipse.git] / net.sourceforge.phpeclipse.ui / src / net / sourceforge / phpdt / internal / ui / text / spelling / engine / DefaultPhoneticHashProvider.java
1 /*******************************************************************************
2  * Copyright (c) 2000, 2003 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials 
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  * 
8  * Contributors:
9  *     IBM Corporation - initial API and implementation
10  *******************************************************************************/
11
12 package net.sourceforge.phpdt.internal.ui.text.spelling.engine;
13
14 /**
15  * Default phonetic hash provider for english languages.
16  * <p>
17  * This algorithm uses an adapted version double metaphone algorithm by Lawrence
18  * Philips.
19  * <p>
20  * 
21  * @since 3.0
22  */
23 public final class DefaultPhoneticHashProvider implements IPhoneticHashProvider {
24
25         private static final String[] meta01 = { "ACH", "" }; //$NON-NLS-1$ //$NON-NLS-2$
26
27         private static final String[] meta02 = { "BACHER", "MACHER", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
28
29         private static final String[] meta03 = { "CAESAR", "" }; //$NON-NLS-1$ //$NON-NLS-2$
30
31         private static final String[] meta04 = { "CHIA", "" }; //$NON-NLS-1$ //$NON-NLS-2$
32
33         private static final String[] meta05 = { "CH", "" }; //$NON-NLS-1$ //$NON-NLS-2$
34
35         private static final String[] meta06 = { "CHAE", "" }; //$NON-NLS-1$ //$NON-NLS-2$
36
37         private static final String[] meta07 = { "HARAC", "HARIS", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
38
39         private static final String[] meta08 = { "HOR", "HYM", "HIA", "HEM", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$
40
41         private static final String[] meta09 = { "CHORE", "" }; //$NON-NLS-1$ //$NON-NLS-2$
42
43         private static final String[] meta10 = { "VAN ", "VON ", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
44
45         private static final String[] meta11 = { "SCH", "" }; //$NON-NLS-1$ //$NON-NLS-2$
46
47         private static final String[] meta12 = { "ORCHES", "ARCHIT", "ORCHID", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
48
49         private static final String[] meta13 = { "T", "S", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
50
51         private static final String[] meta14 = { "A", "O", "U", "E", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$
52
53         private static final String[] meta15 = {
54                         "L", "R", "N", "M", "B", "H", "F", "V", "W", " ", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$ //$NON-NLS-7$ //$NON-NLS-8$ //$NON-NLS-9$ //$NON-NLS-10$ //$NON-NLS-11$
55
56         private static final String[] meta16 = { "MC", "" }; //$NON-NLS-1$ //$NON-NLS-2$
57
58         private static final String[] meta17 = { "CZ", "" }; //$NON-NLS-1$ //$NON-NLS-2$
59
60         private static final String[] meta18 = { "WICZ", "" }; //$NON-NLS-1$ //$NON-NLS-2$
61
62         private static final String[] meta19 = { "CIA", "" }; //$NON-NLS-1$ //$NON-NLS-2$
63
64         private static final String[] meta20 = { "CC", "" }; //$NON-NLS-1$ //$NON-NLS-2$
65
66         private static final String[] meta21 = { "I", "E", "H", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
67
68         private static final String[] meta22 = { "HU", "" }; //$NON-NLS-1$ //$NON-NLS-2$
69
70         private static final String[] meta23 = { "UCCEE", "UCCES", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
71
72         private static final String[] meta24 = { "CK", "CG", "CQ", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
73
74         private static final String[] meta25 = { "CI", "CE", "CY", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
75
76         private static final String[] meta26 = { "GN", "KN", "PN", "WR", "PS", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$
77
78         private static final String[] meta27 = { " C", " Q", " G", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
79
80         private static final String[] meta28 = { "C", "K", "Q", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
81
82         private static final String[] meta29 = { "CE", "CI", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
83
84         private static final String[] meta30 = { "DG", "" }; //$NON-NLS-1$ //$NON-NLS-2$
85
86         private static final String[] meta31 = { "I", "E", "Y", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
87
88         private static final String[] meta32 = { "DT", "DD", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
89
90         private static final String[] meta33 = { "B", "H", "D", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
91
92         private static final String[] meta34 = { "B", "H", "D", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
93
94         private static final String[] meta35 = { "B", "H", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
95
96         private static final String[] meta36 = { "C", "G", "L", "R", "T", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$
97
98         private static final String[] meta37 = { "EY", "" }; //$NON-NLS-1$ //$NON-NLS-2$
99
100         private static final String[] meta38 = { "LI", "" }; //$NON-NLS-1$ //$NON-NLS-2$
101
102         private static final String[] meta39 = {
103                         "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$ //$NON-NLS-7$ //$NON-NLS-8$ //$NON-NLS-9$ //$NON-NLS-10$ //$NON-NLS-11$ //$NON-NLS-12$
104
105         private static final String[] meta40 = { "ER", "" }; //$NON-NLS-1$ //$NON-NLS-2$
106
107         private static final String[] meta41 = { "DANGER", "RANGER", "MANGER", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
108
109         private static final String[] meta42 = { "E", "I", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
110
111         private static final String[] meta43 = { "RGY", "OGY", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
112
113         private static final String[] meta44 = { "E", "I", "Y", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
114
115         private static final String[] meta45 = { "AGGI", "OGGI", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
116
117         private static final String[] meta46 = { "VAN ", "VON ", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
118
119         private static final String[] meta47 = { "SCH", "" }; //$NON-NLS-1$ //$NON-NLS-2$
120
121         private static final String[] meta48 = { "ET", "" }; //$NON-NLS-1$ //$NON-NLS-2$
122
123         private static final String[] meta49 = { "C", "X", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
124
125         private static final String[] meta50 = { "JOSE", "" }; //$NON-NLS-1$ //$NON-NLS-2$
126
127         private static final String[] meta51 = { "SAN ", "" }; //$NON-NLS-1$ //$NON-NLS-2$
128
129         private static final String[] meta52 = { "SAN ", "" }; //$NON-NLS-1$ //$NON-NLS-2$
130
131         private static final String[] meta53 = { "JOSE", "" }; //$NON-NLS-1$ //$NON-NLS-2$
132
133         private static final String[] meta54 = {
134                         "L", "T", "K", "S", "N", "M", "B", "Z", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$ //$NON-NLS-7$ //$NON-NLS-8$ //$NON-NLS-9$
135
136         private static final String[] meta55 = { "S", "K", "L", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
137
138         private static final String[] meta56 = { "ILLO", "ILLA", "ALLE", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
139
140         private static final String[] meta57 = { "AS", "OS", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
141
142         private static final String[] meta58 = { "A", "O", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
143
144         private static final String[] meta59 = { "ALLE", "" }; //$NON-NLS-1$ //$NON-NLS-2$
145
146         private static final String[] meta60 = { "UMB", "" }; //$NON-NLS-1$ //$NON-NLS-2$
147
148         private static final String[] meta61 = { "ER", "" }; //$NON-NLS-1$ //$NON-NLS-2$
149
150         private static final String[] meta62 = { "P", "B", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
151
152         private static final String[] meta63 = { "IE", "" }; //$NON-NLS-1$ //$NON-NLS-2$
153
154         private static final String[] meta64 = { "ME", "MA", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
155
156         private static final String[] meta65 = { "ISL", "YSL", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
157
158         private static final String[] meta66 = { "SUGAR", "" }; //$NON-NLS-1$ //$NON-NLS-2$
159
160         private static final String[] meta67 = { "SH", "" }; //$NON-NLS-1$ //$NON-NLS-2$
161
162         private static final String[] meta68 = { "HEIM", "HOEK", "HOLM", "HOLZ", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$
163
164         private static final String[] meta69 = { "SIO", "SIA", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
165
166         private static final String[] meta70 = { "SIAN", "" }; //$NON-NLS-1$ //$NON-NLS-2$
167
168         private static final String[] meta71 = { "M", "N", "L", "W", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$
169
170         private static final String[] meta72 = { "Z", "" }; //$NON-NLS-1$ //$NON-NLS-2$
171
172         private static final String[] meta73 = { "Z", "" }; //$NON-NLS-1$ //$NON-NLS-2$
173
174         private static final String[] meta74 = { "SC", "" }; //$NON-NLS-1$ //$NON-NLS-2$
175
176         private static final String[] meta75 = {
177                         "OO", "ER", "EN", "UY", "ED", "EM", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$ //$NON-NLS-7$
178
179         private static final String[] meta76 = { "ER", "EN", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
180
181         private static final String[] meta77 = { "I", "E", "Y", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
182
183         private static final String[] meta78 = { "AI", "OI", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
184
185         private static final String[] meta79 = { "S", "Z", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
186
187         private static final String[] meta80 = { "TION", "" }; //$NON-NLS-1$ //$NON-NLS-2$
188
189         private static final String[] meta81 = { "TIA", "TCH", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
190
191         private static final String[] meta82 = { "TH", "" }; //$NON-NLS-1$ //$NON-NLS-2$
192
193         private static final String[] meta83 = { "TTH", "" }; //$NON-NLS-1$ //$NON-NLS-2$
194
195         private static final String[] meta84 = { "OM", "AM", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
196
197         private static final String[] meta85 = { "VAN ", "VON ", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
198
199         private static final String[] meta86 = { "SCH", "" }; //$NON-NLS-1$ //$NON-NLS-2$
200
201         private static final String[] meta87 = { "T", "D", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
202
203         private static final String[] meta88 = { "WR", "" }; //$NON-NLS-1$ //$NON-NLS-2$
204
205         private static final String[] meta89 = { "WH", "" }; //$NON-NLS-1$ //$NON-NLS-2$
206
207         private static final String[] meta90 = {
208                         "EWSKI", "EWSKY", "OWSKI", "OWSKY", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$
209
210         private static final String[] meta91 = { "SCH", "" }; //$NON-NLS-1$ //$NON-NLS-2$
211
212         private static final String[] meta92 = { "WICZ", "WITZ", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
213
214         private static final String[] meta93 = { "IAU", "EAU", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
215
216         private static final String[] meta94 = { "AU", "OU", "" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
217
218         private static final String[] meta95 = { "W", "K", "CZ", "WITZ" }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
219
220         /** The mutator characters */
221         private static final char[] MUTATOR_CHARACTERS = { 'A', 'B', 'X', 'S', 'K',
222                         'J', 'T', 'F', 'H', 'L', 'M', 'N', 'P', 'R', '0' };
223
224         /** The vowel characters */
225         private static final char[] VOWEL_CHARACTERS = new char[] { 'A', 'E', 'I',
226                         'O', 'U', 'Y' };
227
228         /**
229          * Test whether the specified string contains one of the candidates in the
230          * list.
231          * 
232          * @param candidates
233          *            Array of candidates to check
234          * @param token
235          *            The token to check for occurrences of the candidates
236          * @param offset
237          *            The offset where to begin checking in the string
238          * @param length
239          *            The length of the range in the string to check
240          * @return <code>true</code> iff the string contains one of the
241          *         candidates, <code>false</code> otherwise.
242          */
243         protected static final boolean hasOneOf(final String[] candidates,
244                         final char[] token, final int offset, final int length) {
245
246                 if (offset < 0 || offset >= token.length || candidates.length == 0)
247                         return false;
248
249                 final String checkable = new String(token, offset, length);
250                 for (int index = 0; index < candidates.length; index++) {
251
252                         if (candidates[index].equals(checkable))
253                                 return true;
254                 }
255                 return false;
256         }
257
258         /**
259          * Test whether the specified token contains one of the candidates in the
260          * list.
261          * 
262          * @param candidates
263          *            Array of candidates to check
264          * @param token
265          *            The token to check for occurrences of the candidates
266          * @return <code>true</code> iff the string contains one of the
267          *         candidates, <code>false</code> otherwise.
268          */
269         protected static final boolean hasOneOf(final String[] candidates,
270                         final String token) {
271
272                 for (int index = 0; index < candidates.length; index++) {
273
274                         if (token.indexOf(candidates[index]) >= 0)
275                                 return true;
276                 }
277                 return false;
278         }
279
280         /**
281          * Tests whether the specified token contains a vowel at the specified
282          * offset.
283          * 
284          * @param token
285          *            The token to check for a vowel
286          * @param offset
287          *            The offset where to begin checking in the token
288          * @param length
289          *            The length of the range in the token to check
290          * @return <code>true</code> iff the token contains a vowel,
291          *         <code>false</code> otherwise.
292          */
293         protected static final boolean hasVowel(final char[] token,
294                         final int offset, final int length) {
295
296                 if (offset >= 0 && offset < length) {
297
298                         final char character = token[offset];
299                         for (int index = 0; index < VOWEL_CHARACTERS.length; index++) {
300
301                                 if (VOWEL_CHARACTERS[index] == character)
302                                         return true;
303                         }
304                 }
305                 return false;
306         }
307
308         /*
309          * @see org.eclipse.spelling.done.IPhoneticHasher#getHash(java.lang.String)
310          */
311         public final String getHash(final String word) {
312
313                 final String input = word.toUpperCase() + "     "; //$NON-NLS-1$
314                 final char[] hashable = input.toCharArray();
315
316                 final boolean has95 = hasOneOf(meta95, input);
317                 final StringBuffer buffer = new StringBuffer(hashable.length);
318
319                 int offset = 0;
320                 if (hasOneOf(meta26, hashable, 0, 2))
321                         offset += 1;
322
323                 if (hashable[0] == 'X') {
324                         buffer.append('S');
325                         offset += 1;
326                 }
327
328                 while (offset < hashable.length) {
329
330                         switch (hashable[offset]) {
331                         case 'A':
332                         case 'E':
333                         case 'I':
334                         case 'O':
335                         case 'U':
336                         case 'Y':
337                                 if (offset == 0)
338                                         buffer.append('A');
339                                 offset += 1;
340                                 break;
341                         case 'B':
342                                 buffer.append('P');
343                                 if (hashable[offset + 1] == 'B')
344                                         offset += 2;
345                                 else
346                                         offset += 1;
347                                 break;
348                         case 'C':
349                                 if ((offset > 1)
350                                                 && !hasVowel(hashable, offset - 2, hashable.length)
351                                                 && hasOneOf(meta01, hashable, (offset - 1), 3)
352                                                 && (hashable[offset + 2] != 'I')
353                                                 && (hashable[offset + 2] != 'E')
354                                                 || hasOneOf(meta02, hashable, (offset - 2), 6)) {
355                                         buffer.append('K');
356                                         offset += 2;
357                                         break;
358                                 }
359                                 if ((offset == 0) && hasOneOf(meta03, hashable, offset, 6)) {
360                                         buffer.append('S');
361                                         offset += 2;
362                                         break;
363                                 }
364                                 if (hasOneOf(meta04, hashable, offset, 4)) {
365                                         buffer.append('K');
366                                         offset += 2;
367                                         break;
368                                 }
369                                 if (hasOneOf(meta05, hashable, offset, 2)) {
370                                         if ((offset > 0) && hasOneOf(meta06, hashable, offset, 4)) {
371                                                 buffer.append('K');
372                                                 offset += 2;
373                                                 break;
374                                         }
375                                         if ((offset == 0)
376                                                         && hasOneOf(meta07, hashable, (offset + 1), 5)
377                                                         || hasOneOf(meta08, hashable, offset + 1, 3)
378                                                         && !hasOneOf(meta09, hashable, 0, 5)) {
379                                                 buffer.append('K');
380                                                 offset += 2;
381                                                 break;
382                                         }
383                                         if (hasOneOf(meta10, hashable, 0, 4)
384                                                         || hasOneOf(meta11, hashable, 0, 3)
385                                                         || hasOneOf(meta12, hashable, offset - 2, 6)
386                                                         || hasOneOf(meta13, hashable, offset + 2, 1)
387                                                         || (hasOneOf(meta14, hashable, offset - 1, 1) || (offset == 0))
388                                                         && hasOneOf(meta15, hashable, offset + 2, 1)) {
389                                                 buffer.append('K');
390                                         } else {
391                                                 if (offset > 0) {
392                                                         if (hasOneOf(meta16, hashable, 0, 2))
393                                                                 buffer.append('K');
394                                                         else
395                                                                 buffer.append('X');
396                                                 } else {
397                                                         buffer.append('X');
398                                                 }
399                                         }
400                                         offset += 2;
401                                         break;
402                                 }
403                                 if (hasOneOf(meta17, hashable, offset, 2)
404                                                 && !hasOneOf(meta18, hashable, offset, 4)) {
405                                         buffer.append('S');
406                                         offset += 2;
407                                         break;
408                                 }
409                                 if (hasOneOf(meta19, hashable, offset, 2)) {
410                                         buffer.append('X');
411                                         offset += 2;
412                                         break;
413                                 }
414                                 if (hasOneOf(meta20, hashable, offset, 2)
415                                                 && !((offset == 1) && hashable[0] == 'M')) {
416                                         if (hasOneOf(meta21, hashable, offset + 2, 1)
417                                                         && !hasOneOf(meta22, hashable, offset + 2, 2)) {
418                                                 if (((offset == 1) && (hashable[offset - 1] == 'A'))
419                                                                 || hasOneOf(meta23, hashable, (offset - 1), 5))
420                                                         buffer.append("KS"); //$NON-NLS-1$
421                                                 else
422                                                         buffer.append('X');
423                                                 offset += 3;
424                                                 break;
425                                         } else {
426                                                 buffer.append('K');
427                                                 offset += 2;
428                                                 break;
429                                         }
430                                 }
431                                 if (hasOneOf(meta24, hashable, offset, 2)) {
432                                         buffer.append('K');
433                                         offset += 2;
434                                         break;
435                                 } else if (hasOneOf(meta25, hashable, offset, 2)) {
436                                         buffer.append('S');
437                                         offset += 2;
438                                         break;
439                                 }
440                                 buffer.append('K');
441                                 if (hasOneOf(meta27, hashable, offset + 1, 2))
442                                         offset += 3;
443                                 else if (hasOneOf(meta28, hashable, offset + 1, 1)
444                                                 && !hasOneOf(meta29, hashable, offset + 1, 2))
445                                         offset += 2;
446                                 else
447                                         offset += 1;
448                                 break;
449                         case '\u00C7':
450                                 buffer.append('S');
451                                 offset += 1;
452                                 break;
453                         case 'D':
454                                 if (hasOneOf(meta30, hashable, offset, 2)) {
455                                         if (hasOneOf(meta31, hashable, offset + 2, 1)) {
456                                                 buffer.append('J');
457                                                 offset += 3;
458                                                 break;
459                                         } else {
460                                                 buffer.append("TK"); //$NON-NLS-1$
461                                                 offset += 2;
462                                                 break;
463                                         }
464                                 }
465                                 buffer.append('T');
466                                 if (hasOneOf(meta32, hashable, offset, 2)) {
467                                         offset += 2;
468                                 } else {
469                                         offset += 1;
470                                 }
471                                 break;
472                         case 'F':
473                                 if (hashable[offset + 1] == 'F')
474                                         offset += 2;
475                                 else
476                                         offset += 1;
477                                 buffer.append('F');
478                                 break;
479                         case 'G':
480                                 if (hashable[offset + 1] == 'H') {
481                                         if ((offset > 0)
482                                                         && !hasVowel(hashable, offset - 1, hashable.length)) {
483                                                 buffer.append('K');
484                                                 offset += 2;
485                                                 break;
486                                         }
487                                         if (offset < 3) {
488                                                 if (offset == 0) {
489                                                         if (hashable[offset + 2] == 'I')
490                                                                 buffer.append('J');
491                                                         else
492                                                                 buffer.append('K');
493                                                         offset += 2;
494                                                         break;
495                                                 }
496                                         }
497                                         if ((offset > 1)
498                                                         && hasOneOf(meta33, hashable, offset - 2, 1)
499                                                         || ((offset > 2) && hasOneOf(meta34, hashable,
500                                                                         offset - 3, 1))
501                                                         || ((offset > 3) && hasOneOf(meta35, hashable,
502                                                                         offset - 4, 1))) {
503                                                 offset += 2;
504                                                 break;
505                                         } else {
506                                                 if ((offset > 2) && (hashable[offset - 1] == 'U')
507                                                                 && hasOneOf(meta36, hashable, offset - 3, 1)) {
508                                                         buffer.append('F');
509                                                 } else {
510                                                         if ((offset > 0) && (hashable[offset - 1] != 'I'))
511                                                                 buffer.append('K');
512                                                 }
513                                                 offset += 2;
514                                                 break;
515                                         }
516                                 }
517                                 if (hashable[offset + 1] == 'N') {
518                                         if ((offset == 1) && hasVowel(hashable, 0, hashable.length)
519                                                         && !has95) {
520                                                 buffer.append("KN"); //$NON-NLS-1$
521                                         } else {
522                                                 if (!hasOneOf(meta37, hashable, offset + 2, 2)
523                                                                 && (hashable[offset + 1] != 'Y') && !has95) {
524                                                         buffer.append("N"); //$NON-NLS-1$
525                                                 } else {
526                                                         buffer.append("KN"); //$NON-NLS-1$
527                                                 }
528                                         }
529                                         offset += 2;
530                                         break;
531                                 }
532                                 if (hasOneOf(meta38, hashable, offset + 1, 2) && !has95) {
533                                         buffer.append("KL"); //$NON-NLS-1$
534                                         offset += 2;
535                                         break;
536                                 }
537                                 if ((offset == 0)
538                                                 && ((hashable[offset + 1] == 'Y') || hasOneOf(meta39,
539                                                                 hashable, offset + 1, 2))) {
540                                         buffer.append('K');
541                                         offset += 2;
542                                         break;
543                                 }
544                                 if ((hasOneOf(meta40, hashable, offset + 1, 2) || (hashable[offset + 1] == 'Y'))
545                                                 && !hasOneOf(meta41, hashable, 0, 6)
546                                                 && !hasOneOf(meta42, hashable, offset - 1, 1)
547                                                 && !hasOneOf(meta43, hashable, offset - 1, 3)) {
548                                         buffer.append('K');
549                                         offset += 2;
550                                         break;
551                                 }
552                                 if (hasOneOf(meta44, hashable, offset + 1, 1)
553                                                 || hasOneOf(meta45, hashable, offset - 1, 4)) {
554                                         if (hasOneOf(meta46, hashable, 0, 4)
555                                                         || hasOneOf(meta47, hashable, 0, 3)
556                                                         || hasOneOf(meta48, hashable, offset + 1, 2)) {
557                                                 buffer.append('K');
558                                         } else {
559                                                 buffer.append('J');
560                                         }
561                                         offset += 2;
562                                         break;
563                                 }
564                                 if (hashable[offset + 1] == 'G')
565                                         offset += 2;
566                                 else
567                                         offset += 1;
568                                 buffer.append('K');
569                                 break;
570                         case 'H':
571                                 if (((offset == 0) || hasVowel(hashable, offset - 1,
572                                                 hashable.length))
573                                                 && hasVowel(hashable, offset + 1, hashable.length)) {
574                                         buffer.append('H');
575                                         offset += 2;
576                                 } else {
577                                         offset += 1;
578                                 }
579                                 break;
580                         case 'J':
581                                 if (hasOneOf(meta50, hashable, offset, 4)
582                                                 || hasOneOf(meta51, hashable, 0, 4)) {
583                                         if ((offset == 0) && (hashable[offset + 4] == ' ')
584                                                         || hasOneOf(meta52, hashable, 0, 4)) {
585                                                 buffer.append('H');
586                                         } else {
587                                                 buffer.append('J');
588                                         }
589                                         offset += 1;
590                                         break;
591                                 }
592                                 if ((offset == 0) && !hasOneOf(meta53, hashable, offset, 4)) {
593                                         buffer.append('J');
594                                 } else {
595                                         if (hasVowel(hashable, offset - 1, hashable.length)
596                                                         && !has95
597                                                         && ((hashable[offset + 1] == 'A') || hashable[offset + 1] == 'O')) {
598                                                 buffer.append('J');
599                                         } else {
600                                                 if (offset == (hashable.length - 1)) {
601                                                         buffer.append('J');
602                                                 } else {
603                                                         if (!hasOneOf(meta54, hashable, offset + 1, 1)
604                                                                         && !hasOneOf(meta55, hashable, offset - 1,
605                                                                                         1)) {
606                                                                 buffer.append('J');
607                                                         }
608                                                 }
609                                         }
610                                 }
611                                 if (hashable[offset + 1] == 'J')
612                                         offset += 2;
613                                 else
614                                         offset += 1;
615                                 break;
616                         case 'K':
617                                 if (hashable[offset + 1] == 'K')
618                                         offset += 2;
619                                 else
620                                         offset += 1;
621                                 buffer.append('K');
622                                 break;
623                         case 'L':
624                                 if (hashable[offset + 1] == 'L') {
625                                         if (((offset == (hashable.length - 3)) && hasOneOf(meta56,
626                                                         hashable, offset - 1, 4))
627                                                         || ((hasOneOf(meta57, hashable,
628                                                                         (hashable.length - 1) - 1, 2) || hasOneOf(
629                                                                         meta58, hashable, hashable.length - 1, 1)) && hasOneOf(
630                                                                         meta59, hashable, offset - 1, 4))) {
631                                                 buffer.append('L');
632                                                 offset += 2;
633                                                 break;
634                                         }
635                                         offset += 2;
636                                 } else
637                                         offset += 1;
638                                 buffer.append('L');
639                                 break;
640                         case 'M':
641                                 if ((hasOneOf(meta60, hashable, offset - 1, 3) && (((offset + 1) == (hashable.length - 1)) || hasOneOf(
642                                                 meta61, hashable, offset + 2, 2)))
643                                                 || (hashable[offset + 1] == 'M'))
644                                         offset += 2;
645                                 else
646                                         offset += 1;
647                                 buffer.append('M');
648                                 break;
649                         case 'N':
650                                 if (hashable[offset + 1] == 'N')
651                                         offset += 2;
652                                 else
653                                         offset += 1;
654                                 buffer.append('N');
655                                 break;
656                         case '\u00D1':
657                                 offset += 1;
658                                 buffer.append('N');
659                                 break;
660                         case 'P':
661                                 if (hashable[offset + 1] == 'N') {
662                                         buffer.append('F');
663                                         offset += 2;
664                                         break;
665                                 }
666                                 if (hasOneOf(meta62, hashable, offset + 1, 1))
667                                         offset += 2;
668                                 else
669                                         offset += 1;
670                                 buffer.append('P');
671                                 break;
672                         case 'Q':
673                                 if (hashable[offset + 1] == 'Q')
674                                         offset += 2;
675                                 else
676                                         offset += 1;
677                                 buffer.append('K');
678                                 break;
679                         case 'R':
680                                 if (!((offset == (hashable.length - 1)) && !has95
681                                                 && hasOneOf(meta63, hashable, offset - 2, 2) && !hasOneOf(
682                                                 meta64, hashable, offset - 4, 2)))
683                                         buffer.append('R');
684                                 if (hashable[offset + 1] == 'R')
685                                         offset += 2;
686                                 else
687                                         offset += 1;
688                                 break;
689                         case 'S':
690                                 if (hasOneOf(meta65, hashable, offset - 1, 3)) {
691                                         offset += 1;
692                                         break;
693                                 }
694                                 if ((offset == 0) && hasOneOf(meta66, hashable, offset, 5)) {
695                                         buffer.append('X');
696                                         offset += 1;
697                                         break;
698                                 }
699                                 if (hasOneOf(meta67, hashable, offset, 2)) {
700                                         if (hasOneOf(meta68, hashable, offset + 1, 4))
701                                                 buffer.append('S');
702                                         else
703                                                 buffer.append('X');
704                                         offset += 2;
705                                         break;
706                                 }
707                                 if (hasOneOf(meta69, hashable, offset, 3)
708                                                 || hasOneOf(meta70, hashable, offset, 4)) {
709                                         buffer.append('S');
710                                         offset += 3;
711                                         break;
712                                 }
713                                 if (((offset == 0) && hasOneOf(meta71, hashable, offset + 1, 1))
714                                                 || hasOneOf(meta72, hashable, offset + 1, 1)) {
715                                         buffer.append('S');
716                                         if (hasOneOf(meta73, hashable, offset + 1, 1))
717                                                 offset += 2;
718                                         else
719                                                 offset += 1;
720                                         break;
721                                 }
722                                 if (hasOneOf(meta74, hashable, offset, 2)) {
723                                         if (hashable[offset + 2] == 'H')
724                                                 if (hasOneOf(meta75, hashable, offset + 3, 2)) {
725                                                         if (hasOneOf(meta76, hashable, offset + 3, 2)) {
726                                                                 buffer.append("X"); //$NON-NLS-1$
727                                                         } else {
728                                                                 buffer.append("SK"); //$NON-NLS-1$
729                                                         }
730                                                         offset += 3;
731                                                         break;
732                                                 } else {
733                                                         buffer.append('X');
734                                                         offset += 3;
735                                                         break;
736                                                 }
737                                         if (hasOneOf(meta77, hashable, offset + 2, 1)) {
738                                                 buffer.append('S');
739                                                 offset += 3;
740                                                 break;
741                                         }
742                                         buffer.append("SK"); //$NON-NLS-1$
743                                         offset += 3;
744                                         break;
745                                 }
746                                 if (!((offset == (hashable.length - 1)) && hasOneOf(meta78,
747                                                 hashable, offset - 2, 2)))
748                                         buffer.append('S');
749                                 if (hasOneOf(meta79, hashable, offset + 1, 1))
750                                         offset += 2;
751                                 else
752                                         offset += 1;
753                                 break;
754                         case 'T':
755                                 if (hasOneOf(meta80, hashable, offset, 4)) {
756                                         buffer.append('X');
757                                         offset += 3;
758                                         break;
759                                 }
760                                 if (hasOneOf(meta81, hashable, offset, 3)) {
761                                         buffer.append('X');
762                                         offset += 3;
763                                         break;
764                                 }
765                                 if (hasOneOf(meta82, hashable, offset, 2)
766                                                 || hasOneOf(meta83, hashable, offset, 3)) {
767                                         if (hasOneOf(meta84, hashable, (offset + 2), 2)
768                                                         || hasOneOf(meta85, hashable, 0, 4)
769                                                         || hasOneOf(meta86, hashable, 0, 3)) {
770                                                 buffer.append('T');
771                                         } else {
772                                                 buffer.append('0');
773                                         }
774                                         offset += 2;
775                                         break;
776                                 }
777                                 if (hasOneOf(meta87, hashable, offset + 1, 1)) {
778                                         offset += 2;
779                                 } else
780                                         offset += 1;
781                                 buffer.append('T');
782                                 break;
783                         case 'V':
784                                 if (hashable[offset + 1] == 'V')
785                                         offset += 2;
786                                 else
787                                         offset += 1;
788                                 buffer.append('F');
789                                 break;
790                         case 'W':
791                                 if (hasOneOf(meta88, hashable, offset, 2)) {
792                                         buffer.append('R');
793                                         offset += 2;
794                                         break;
795                                 }
796                                 if ((offset == 0)
797                                                 && (hasVowel(hashable, offset + 1, hashable.length) || hasOneOf(
798                                                                 meta89, hashable, offset, 2))) {
799                                         buffer.append('A');
800                                 }
801                                 if (((offset == (hashable.length - 1)) && hasVowel(hashable,
802                                                 offset - 1, hashable.length))
803                                                 || hasOneOf(meta90, hashable, offset - 1, 5)
804                                                 || hasOneOf(meta91, hashable, 0, 3)) {
805                                         buffer.append('F');
806                                         offset += 1;
807                                         break;
808                                 }
809                                 if (hasOneOf(meta92, hashable, offset, 4)) {
810                                         buffer.append("TS"); //$NON-NLS-1$
811                                         offset += 4;
812                                         break;
813                                 }
814                                 offset += 1;
815                                 break;
816                         case 'X':
817                                 if (!((offset == (hashable.length - 1)) && (hasOneOf(meta93,
818                                                 hashable, offset - 3, 3) || hasOneOf(meta94, hashable,
819                                                 offset - 2, 2))))
820                                         buffer.append("KS"); //$NON-NLS-1$
821                                 if (hasOneOf(meta49, hashable, offset + 1, 1))
822                                         offset += 2;
823                                 else
824                                         offset += 1;
825                                 break;
826                         case 'Z':
827                                 if (hashable[offset + 1] == 'H') {
828                                         buffer.append('J');
829                                         offset += 2;
830                                         break;
831                                 } else {
832                                         buffer.append('S');
833                                 }
834                                 if (hashable[offset + 1] == 'Z')
835                                         offset += 2;
836                                 else
837                                         offset += 1;
838                                 break;
839                         default:
840                                 offset += 1;
841                         }
842                 }
843                 return buffer.toString();
844         }
845
846         /*
847          * @see org.eclipse.spelling.done.IPhoneticHasher#getMutators()
848          */
849         public final char[] getMutators() {
850                 return MUTATOR_CHARACTERS;
851         }
852 }