bc83a88785657d02d283c34da5a8d3f89f78df74
[phpeclipse.git] / net.sourceforge.phpeclipse.ui / src / net / sourceforge / phpdt / internal / ui / text / JavaBreakIterator.java
1 /*******************************************************************************
2  * Copyright (c) 2000, 2004 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials 
4  * are made available under the terms of the Common Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v10.html
7  * 
8  * Contributors:
9  *     IBM Corporation - initial API and implementation
10  *******************************************************************************/
11 package net.sourceforge.phpdt.internal.ui.text;
12
13 import java.text.BreakIterator;
14 import java.text.CharacterIterator;
15
16 import net.sourceforge.phpdt.internal.compiler.parser.Scanner;
17
18 //incastrix
19 //import org.eclipse.jface.text.Assert;
20 import org.eclipse.core.runtime.Assert;
21
22 /**
23  * A java break iterator. It returns all breaks, including before and after
24  * whitespace, and it returns all camelcase breaks.
25  * <p>
26  * A line break may be any of "\n", "\r", "\r\n", "\n\r".
27  * </p>
28  * 
29  * @since 3.0
30  */
31 public class JavaBreakIterator extends BreakIterator {
32
33         /**
34          * A run of common characters.
35          */
36         protected static abstract class Run {
37                 /** The length of this run. */
38                 protected int length;
39
40                 public Run() {
41                         init();
42                 }
43
44                 /**
45                  * Returns <code>true</code> if this run consumes <code>ch</code>,
46                  * <code>false</code> otherwise. If <code>true</code> is returned,
47                  * the length of the receiver is adjusted accordingly.
48                  * 
49                  * @param ch
50                  *            the character to test
51                  * @return <code>true</code> if <code>ch</code> was consumed
52                  */
53                 protected boolean consume(char ch) {
54                         if (isValid(ch)) {
55                                 length++;
56                                 return true;
57                         }
58                         return false;
59                 }
60
61                 /**
62                  * Whether this run accepts that character; does not update state.
63                  * Called from the default implementation of <code>consume</code>.
64                  * 
65                  * @param ch
66                  *            the character to test
67                  * @return <code>true</code> if <code>ch</code> is accepted
68                  */
69                 protected abstract boolean isValid(char ch);
70
71                 /**
72                  * Resets this run to the initial state.
73                  */
74                 protected void init() {
75                         length = 0;
76                 }
77         }
78
79         static final class Whitespace extends Run {
80                 protected boolean isValid(char ch) {
81                         return Character.isWhitespace(ch) && ch != '\n' && ch != '\r';
82                 }
83         }
84
85         static final class LineDelimiter extends Run {
86                 /** State: INIT -> delimiter -> EXIT. */
87                 private char fState;
88
89                 private static final char INIT = '\0';
90
91                 private static final char EXIT = '\1';
92
93                 /*
94                  * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#init()
95                  */
96                 protected void init() {
97                         super.init();
98                         fState = INIT;
99                 }
100
101                 /*
102                  * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#consume(char)
103                  */
104                 protected boolean consume(char ch) {
105                         if (!isValid(ch) || fState == EXIT)
106                                 return false;
107
108                         if (fState == INIT) {
109                                 fState = ch;
110                                 length++;
111                                 return true;
112                         } else if (fState != ch) {
113                                 fState = EXIT;
114                                 length++;
115                                 return true;
116                         } else {
117                                 return false;
118                         }
119                 }
120
121                 protected boolean isValid(char ch) {
122                         return ch == '\n' || ch == '\r';
123                 }
124         }
125
126         static final class Identifier extends Run {
127                 /*
128                  * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
129                  */
130                 protected boolean isValid(char ch) {
131                         return Scanner.isPHPIdentifierPart(ch);
132                 }
133         }
134
135         static final class CamelCaseIdentifier extends Run {
136                 /* states */
137                 private static final int S_INIT = 0;
138
139                 private static final int S_LOWER = 1;
140
141                 private static final int S_ONE_CAP = 2;
142
143                 private static final int S_ALL_CAPS = 3;
144
145                 private static final int S_EXIT = 4;
146
147                 private static final int S_EXIT_MINUS_ONE = 5;
148
149                 /* character types */
150                 private static final int K_INVALID = 0;
151
152                 private static final int K_LOWER = 1;
153
154                 private static final int K_UPPER = 2;
155
156                 private static final int K_OTHER = 3;
157
158                 private int fState;
159
160                 private final static int[][] MATRIX = new int[][] {
161                 // K_INVALID, K_LOWER, K_UPPER, K_OTHER
162                                 { S_EXIT, S_LOWER, S_ONE_CAP, S_LOWER }, // S_INIT
163                                 { S_EXIT, S_LOWER, S_EXIT, S_LOWER }, // S_LOWER
164                                 { S_EXIT, S_LOWER, S_ALL_CAPS, S_LOWER }, // S_ONE_CAP
165                                 { S_EXIT, S_EXIT_MINUS_ONE, S_ALL_CAPS, S_LOWER }, // S_ALL_CAPS
166                 };
167
168                 /*
169                  * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#init()
170                  */
171                 protected void init() {
172                         super.init();
173                         fState = S_INIT;
174                 }
175
176                 /*
177                  * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#consumes(char)
178                  */
179                 protected boolean consume(char ch) {
180                         int kind = getKind(ch);
181                         fState = MATRIX[fState][kind];
182                         switch (fState) {
183                         case S_LOWER:
184                         case S_ONE_CAP:
185                         case S_ALL_CAPS:
186                                 length++;
187                                 return true;
188                         case S_EXIT:
189                                 return false;
190                         case S_EXIT_MINUS_ONE:
191                                 length--;
192                                 return false;
193                         default:
194                                 Assert.isTrue(false);
195                                 return false;
196                         }
197                 }
198
199                 /**
200                  * Determines the kind of a character.
201                  * 
202                  * @param ch
203                  *            the character to test
204                  */
205                 private int getKind(char ch) {
206                         if (Character.isUpperCase(ch))
207                                 return K_UPPER;
208                         if (Character.isLowerCase(ch))
209                                 return K_LOWER;
210                         if (Scanner.isPHPIdentifierPart(ch)) // _, digits...
211                                 return K_OTHER;
212                         return K_INVALID;
213                 }
214
215                 /*
216                  * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
217                  */
218                 protected boolean isValid(char ch) {
219                         return Scanner.isPHPIdentifierPart(ch);
220                 }
221         }
222
223         static final class Other extends Run {
224                 /*
225                  * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
226                  */
227                 protected boolean isValid(char ch) {
228                         return !Character.isWhitespace(ch)
229                                         && !Scanner.isPHPIdentifierPart(ch);
230                 }
231         }
232
233         private static final Run WHITESPACE = new Whitespace();
234
235         private static final Run DELIMITER = new LineDelimiter();
236
237         private static final Run CAMELCASE = new CamelCaseIdentifier(); // new
238                                                                                                                                         // Identifier();
239
240         private static final Run OTHER = new Other();
241
242         /** The platform break iterator (word instance) used as a base. */
243         protected final BreakIterator fIterator;
244
245         /** The text we operate on. */
246         protected CharSequence fText;
247
248         /** our current position for the stateful methods. */
249         private int fIndex;
250
251         /**
252          * Creates a new break iterator.
253          */
254         public JavaBreakIterator() {
255                 fIterator = BreakIterator.getWordInstance();
256                 fIndex = fIterator.current();
257         }
258
259         /*
260          * @see java.text.BreakIterator#current()
261          */
262         public int current() {
263                 return fIndex;
264         }
265
266         /*
267          * @see java.text.BreakIterator#first()
268          */
269         public int first() {
270                 fIndex = fIterator.first();
271                 return fIndex;
272         }
273
274         /*
275          * @see java.text.BreakIterator#following(int)
276          */
277         public int following(int offset) {
278                 // work around too eager IAEs in standard impl
279                 if (offset == getText().getEndIndex())
280                         return DONE;
281
282                 int next = fIterator.following(offset);
283                 if (next == DONE)
284                         return DONE;
285
286                 // TODO deal with complex script word boundaries
287                 // Math.min(offset + run.length, next) does not work
288                 // since wordinstance considers _ as boundaries
289                 // seems to work fine, however
290                 Run run = consumeRun(offset);
291                 return offset + run.length;
292
293         }
294
295         /**
296          * Consumes a run of characters at the limits of which we introduce a break.
297          * 
298          * @param offset
299          *            the offset to start at
300          * @return the run that was consumed
301          */
302         private Run consumeRun(int offset) {
303                 // assert offset < length
304
305                 char ch = fText.charAt(offset);
306                 int length = fText.length();
307                 Run run = getRun(ch);
308                 while (run.consume(ch) && offset < length - 1) {
309                         offset++;
310                         ch = fText.charAt(offset);
311                 }
312
313                 return run;
314         }
315
316         /**
317          * Retunrs a run based on a character.
318          * 
319          * @param ch
320          *            the character to test
321          * @return the correct character given <code>ch</code>
322          */
323         private Run getRun(char ch) {
324                 Run run;
325                 if (WHITESPACE.isValid(ch))
326                         run = WHITESPACE;
327                 else if (DELIMITER.isValid(ch))
328                         run = DELIMITER;
329                 else if (CAMELCASE.isValid(ch))
330                         run = CAMELCASE;
331                 else if (OTHER.isValid(ch))
332                         run = OTHER;
333                 else {
334                         Assert.isTrue(false);
335                         return null;
336                 }
337
338                 run.init();
339                 return run;
340         }
341
342         /*
343          * @see java.text.BreakIterator#getText()
344          */
345         public CharacterIterator getText() {
346                 return fIterator.getText();
347         }
348
349         /*
350          * @see java.text.BreakIterator#isBoundary(int)
351          */
352         public boolean isBoundary(int offset) {
353                 if (offset == getText().getBeginIndex())
354                         return true;
355                 else
356                         return following(offset - 1) == offset;
357         }
358
359         /*
360          * @see java.text.BreakIterator#last()
361          */
362         public int last() {
363                 fIndex = fIterator.last();
364                 return fIndex;
365         }
366
367         /*
368          * @see java.text.BreakIterator#next()
369          */
370         public int next() {
371                 fIndex = following(fIndex);
372                 return fIndex;
373         }
374
375         /*
376          * @see java.text.BreakIterator#next(int)
377          */
378         public int next(int n) {
379                 return fIterator.next(n);
380         }
381
382         /*
383          * @see java.text.BreakIterator#preceding(int)
384          */
385         public int preceding(int offset) {
386                 if (offset == getText().getBeginIndex())
387                         return DONE;
388
389                 if (isBoundary(offset - 1))
390                         return offset - 1;
391
392                 int previous = offset - 1;
393                 do {
394                         previous = fIterator.preceding(previous);
395                 } while (!isBoundary(previous));
396
397                 int last = DONE;
398                 while (previous < offset) {
399                         last = previous;
400                         previous = following(previous);
401                 }
402
403                 return last;
404         }
405
406         /*
407          * @see java.text.BreakIterator#previous()
408          */
409         public int previous() {
410                 fIndex = preceding(fIndex);
411                 return fIndex;
412         }
413
414         /*
415          * @see java.text.BreakIterator#setText(java.lang.String)
416          */
417         public void setText(String newText) {
418                 setText((CharSequence) newText);
419         }
420
421         /**
422          * Creates a break iterator given a char sequence.
423          * 
424          * @param newText
425          *            the new text
426          */
427         public void setText(CharSequence newText) {
428                 fText = newText;
429                 fIterator.setText(new SequenceCharacterIterator(newText));
430                 first();
431         }
432
433         /*
434          * @see java.text.BreakIterator#setText(java.text.CharacterIterator)
435          */
436         public void setText(CharacterIterator newText) {
437                 if (newText instanceof CharSequence) {
438                         fText = (CharSequence) newText;
439                         fIterator.setText(newText);
440                         first();
441                 } else {
442                         throw new UnsupportedOperationException(
443                                         "CharacterIterator not supported"); //$NON-NLS-1$
444                 }
445         }
446 }