f80d88a4dd5a9b5b8974bb3739793c66acc78b1e
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / ui / text / JavaBreakIterator.java
1 /*******************************************************************************
2  * Copyright (c) 2000, 2004 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials 
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  * 
8  * Contributors:
9  *     IBM Corporation - initial API and implementation
10  *******************************************************************************/
11 package net.sourceforge.phpdt.internal.ui.text;
12
13 import java.text.BreakIterator;
14 import java.text.CharacterIterator;
15
16 import net.sourceforge.phpdt.internal.compiler.parser.Scanner;
17
18 import org.eclipse.jface.text.Assert;
19
20 /**
21  * A java break iterator. It returns all breaks, including before and after
22  * whitespace, and it returns all camelcase breaks.
23  * <p>
24  * A line break may be any of "\n", "\r", "\r\n", "\n\r".
25  * </p>
26  * 
27  * @since 3.0
28  */
29 public class JavaBreakIterator extends BreakIterator {
30
31         /**
32          * A run of common characters.
33          */
34         protected static abstract class Run {
35                 /** The length of this run. */
36                 protected int length;
37
38                 public Run() {
39                         init();
40                 }
41
42                 /**
43                  * Returns <code>true</code> if this run consumes <code>ch</code>,
44                  * <code>false</code> otherwise. If <code>true</code> is returned,
45                  * the length of the receiver is adjusted accordingly.
46                  * 
47                  * @param ch
48                  *            the character to test
49                  * @return <code>true</code> if <code>ch</code> was consumed
50                  */
51                 protected boolean consume(char ch) {
52                         if (isValid(ch)) {
53                                 length++;
54                                 return true;
55                         }
56                         return false;
57                 }
58
59                 /**
60                  * Whether this run accepts that character; does not update state.
61                  * Called from the default implementation of <code>consume</code>.
62                  * 
63                  * @param ch
64                  *            the character to test
65                  * @return <code>true</code> if <code>ch</code> is accepted
66                  */
67                 protected abstract boolean isValid(char ch);
68
69                 /**
70                  * Resets this run to the initial state.
71                  */
72                 protected void init() {
73                         length = 0;
74                 }
75         }
76
77         static final class Whitespace extends Run {
78                 protected boolean isValid(char ch) {
79                         return Character.isWhitespace(ch) && ch != '\n' && ch != '\r';
80                 }
81         }
82
83         static final class LineDelimiter extends Run {
84                 /** State: INIT -> delimiter -> EXIT. */
85                 private char fState;
86
87                 private static final char INIT = '\0';
88
89                 private static final char EXIT = '\1';
90
91                 /*
92                  * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#init()
93                  */
94                 protected void init() {
95                         super.init();
96                         fState = INIT;
97                 }
98
99                 /*
100                  * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#consume(char)
101                  */
102                 protected boolean consume(char ch) {
103                         if (!isValid(ch) || fState == EXIT)
104                                 return false;
105
106                         if (fState == INIT) {
107                                 fState = ch;
108                                 length++;
109                                 return true;
110                         } else if (fState != ch) {
111                                 fState = EXIT;
112                                 length++;
113                                 return true;
114                         } else {
115                                 return false;
116                         }
117                 }
118
119                 protected boolean isValid(char ch) {
120                         return ch == '\n' || ch == '\r';
121                 }
122         }
123
124         static final class Identifier extends Run {
125                 /*
126                  * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
127                  */
128                 protected boolean isValid(char ch) {
129                         return Scanner.isPHPIdentifierPart(ch);
130                 }
131         }
132
133         static final class CamelCaseIdentifier extends Run {
134                 /* states */
135                 private static final int S_INIT = 0;
136
137                 private static final int S_LOWER = 1;
138
139                 private static final int S_ONE_CAP = 2;
140
141                 private static final int S_ALL_CAPS = 3;
142
143                 private static final int S_EXIT = 4;
144
145                 private static final int S_EXIT_MINUS_ONE = 5;
146
147                 /* character types */
148                 private static final int K_INVALID = 0;
149
150                 private static final int K_LOWER = 1;
151
152                 private static final int K_UPPER = 2;
153
154                 private static final int K_OTHER = 3;
155
156                 private int fState;
157
158                 private final static int[][] MATRIX = new int[][] {
159                 // K_INVALID, K_LOWER, K_UPPER, K_OTHER
160                                 { S_EXIT, S_LOWER, S_ONE_CAP, S_LOWER }, // S_INIT
161                                 { S_EXIT, S_LOWER, S_EXIT, S_LOWER }, // S_LOWER
162                                 { S_EXIT, S_LOWER, S_ALL_CAPS, S_LOWER }, // S_ONE_CAP
163                                 { S_EXIT, S_EXIT_MINUS_ONE, S_ALL_CAPS, S_LOWER }, // S_ALL_CAPS
164                 };
165
166                 /*
167                  * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#init()
168                  */
169                 protected void init() {
170                         super.init();
171                         fState = S_INIT;
172                 }
173
174                 /*
175                  * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#consumes(char)
176                  */
177                 protected boolean consume(char ch) {
178                         int kind = getKind(ch);
179                         fState = MATRIX[fState][kind];
180                         switch (fState) {
181                         case S_LOWER:
182                         case S_ONE_CAP:
183                         case S_ALL_CAPS:
184                                 length++;
185                                 return true;
186                         case S_EXIT:
187                                 return false;
188                         case S_EXIT_MINUS_ONE:
189                                 length--;
190                                 return false;
191                         default:
192                                 Assert.isTrue(false);
193                                 return false;
194                         }
195                 }
196
197                 /**
198                  * Determines the kind of a character.
199                  * 
200                  * @param ch
201                  *            the character to test
202                  */
203                 private int getKind(char ch) {
204                         if (Character.isUpperCase(ch))
205                                 return K_UPPER;
206                         if (Character.isLowerCase(ch))
207                                 return K_LOWER;
208                         if (Scanner.isPHPIdentifierPart(ch)) // _, digits...
209                                 return K_OTHER;
210                         return K_INVALID;
211                 }
212
213                 /*
214                  * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
215                  */
216                 protected boolean isValid(char ch) {
217                         return Scanner.isPHPIdentifierPart(ch);
218                 }
219         }
220
221         static final class Other extends Run {
222                 /*
223                  * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
224                  */
225                 protected boolean isValid(char ch) {
226                         return !Character.isWhitespace(ch)
227                                         && !Scanner.isPHPIdentifierPart(ch);
228                 }
229         }
230
231         private static final Run WHITESPACE = new Whitespace();
232
233         private static final Run DELIMITER = new LineDelimiter();
234
235         private static final Run CAMELCASE = new CamelCaseIdentifier(); // new
236                                                                                                                                         // Identifier();
237
238         private static final Run OTHER = new Other();
239
240         /** The platform break iterator (word instance) used as a base. */
241         protected final BreakIterator fIterator;
242
243         /** The text we operate on. */
244         protected CharSequence fText;
245
246         /** our current position for the stateful methods. */
247         private int fIndex;
248
249         /**
250          * Creates a new break iterator.
251          */
252         public JavaBreakIterator() {
253                 fIterator = BreakIterator.getWordInstance();
254                 fIndex = fIterator.current();
255         }
256
257         /*
258          * @see java.text.BreakIterator#current()
259          */
260         public int current() {
261                 return fIndex;
262         }
263
264         /*
265          * @see java.text.BreakIterator#first()
266          */
267         public int first() {
268                 fIndex = fIterator.first();
269                 return fIndex;
270         }
271
272         /*
273          * @see java.text.BreakIterator#following(int)
274          */
275         public int following(int offset) {
276                 // work around too eager IAEs in standard impl
277                 if (offset == getText().getEndIndex())
278                         return DONE;
279
280                 int next = fIterator.following(offset);
281                 if (next == DONE)
282                         return DONE;
283
284                 // TODO deal with complex script word boundaries
285                 // Math.min(offset + run.length, next) does not work
286                 // since wordinstance considers _ as boundaries
287                 // seems to work fine, however
288                 Run run = consumeRun(offset);
289                 return offset + run.length;
290
291         }
292
293         /**
294          * Consumes a run of characters at the limits of which we introduce a break.
295          * 
296          * @param offset
297          *            the offset to start at
298          * @return the run that was consumed
299          */
300         private Run consumeRun(int offset) {
301                 // assert offset < length
302
303                 char ch = fText.charAt(offset);
304                 int length = fText.length();
305                 Run run = getRun(ch);
306                 while (run.consume(ch) && offset < length - 1) {
307                         offset++;
308                         ch = fText.charAt(offset);
309                 }
310
311                 return run;
312         }
313
314         /**
315          * Retunrs a run based on a character.
316          * 
317          * @param ch
318          *            the character to test
319          * @return the correct character given <code>ch</code>
320          */
321         private Run getRun(char ch) {
322                 Run run;
323                 if (WHITESPACE.isValid(ch))
324                         run = WHITESPACE;
325                 else if (DELIMITER.isValid(ch))
326                         run = DELIMITER;
327                 else if (CAMELCASE.isValid(ch))
328                         run = CAMELCASE;
329                 else if (OTHER.isValid(ch))
330                         run = OTHER;
331                 else {
332                         Assert.isTrue(false);
333                         return null;
334                 }
335
336                 run.init();
337                 return run;
338         }
339
340         /*
341          * @see java.text.BreakIterator#getText()
342          */
343         public CharacterIterator getText() {
344                 return fIterator.getText();
345         }
346
347         /*
348          * @see java.text.BreakIterator#isBoundary(int)
349          */
350         public boolean isBoundary(int offset) {
351                 if (offset == getText().getBeginIndex())
352                         return true;
353                 else
354                         return following(offset - 1) == offset;
355         }
356
357         /*
358          * @see java.text.BreakIterator#last()
359          */
360         public int last() {
361                 fIndex = fIterator.last();
362                 return fIndex;
363         }
364
365         /*
366          * @see java.text.BreakIterator#next()
367          */
368         public int next() {
369                 fIndex = following(fIndex);
370                 return fIndex;
371         }
372
373         /*
374          * @see java.text.BreakIterator#next(int)
375          */
376         public int next(int n) {
377                 return fIterator.next(n);
378         }
379
380         /*
381          * @see java.text.BreakIterator#preceding(int)
382          */
383         public int preceding(int offset) {
384                 if (offset == getText().getBeginIndex())
385                         return DONE;
386
387                 if (isBoundary(offset - 1))
388                         return offset - 1;
389
390                 int previous = offset - 1;
391                 do {
392                         previous = fIterator.preceding(previous);
393                 } while (!isBoundary(previous));
394
395                 int last = DONE;
396                 while (previous < offset) {
397                         last = previous;
398                         previous = following(previous);
399                 }
400
401                 return last;
402         }
403
404         /*
405          * @see java.text.BreakIterator#previous()
406          */
407         public int previous() {
408                 fIndex = preceding(fIndex);
409                 return fIndex;
410         }
411
412         /*
413          * @see java.text.BreakIterator#setText(java.lang.String)
414          */
415         public void setText(String newText) {
416                 setText((CharSequence) newText);
417         }
418
419         /**
420          * Creates a break iterator given a char sequence.
421          * 
422          * @param newText
423          *            the new text
424          */
425         public void setText(CharSequence newText) {
426                 fText = newText;
427                 fIterator.setText(new SequenceCharacterIterator(newText));
428                 first();
429         }
430
431         /*
432          * @see java.text.BreakIterator#setText(java.text.CharacterIterator)
433          */
434         public void setText(CharacterIterator newText) {
435                 if (newText instanceof CharSequence) {
436                         fText = (CharSequence) newText;
437                         fIterator.setText(newText);
438                         first();
439                 } else {
440                         throw new UnsupportedOperationException(
441                                         "CharacterIterator not supported"); //$NON-NLS-1$
442                 }
443         }
444 }