1 /*******************************************************************************
2 * Copyright (c) 2000, 2004 IBM Corporation and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v10.html
9 * IBM Corporation - initial API and implementation
10 *******************************************************************************/
11 package net.sourceforge.phpdt.internal.ui.text;
13 import java.text.BreakIterator;
14 import java.text.CharacterIterator;
16 import net.sourceforge.phpdt.internal.compiler.parser.Scanner;
19 //import org.eclipse.jface.text.Assert;
20 import org.eclipse.core.runtime.Assert;
23 * A java break iterator. It returns all breaks, including before and after
24 * whitespace, and it returns all camelcase breaks.
26 * A line break may be any of "\n", "\r", "\r\n", "\n\r".
31 public class JavaBreakIterator extends BreakIterator {
34 * A run of common characters.
36 protected static abstract class Run {
37 /** The length of this run. */
45 * Returns <code>true</code> if this run consumes <code>ch</code>,
46 * <code>false</code> otherwise. If <code>true</code> is returned,
47 * the length of the receiver is adjusted accordingly.
50 * the character to test
51 * @return <code>true</code> if <code>ch</code> was consumed
53 protected boolean consume(char ch) {
62 * Whether this run accepts that character; does not update state.
63 * Called from the default implementation of <code>consume</code>.
66 * the character to test
67 * @return <code>true</code> if <code>ch</code> is accepted
69 protected abstract boolean isValid(char ch);
72 * Resets this run to the initial state.
74 protected void init() {
79 static final class Whitespace extends Run {
80 protected boolean isValid(char ch) {
81 return Character.isWhitespace(ch) && ch != '\n' && ch != '\r';
85 static final class LineDelimiter extends Run {
86 /** State: INIT -> delimiter -> EXIT. */
89 private static final char INIT = '\0';
91 private static final char EXIT = '\1';
94 * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#init()
96 protected void init() {
102 * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#consume(char)
104 protected boolean consume(char ch) {
105 if (!isValid(ch) || fState == EXIT)
108 if (fState == INIT) {
112 } else if (fState != ch) {
121 protected boolean isValid(char ch) {
122 return ch == '\n' || ch == '\r';
126 static final class Identifier extends Run {
128 * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
130 protected boolean isValid(char ch) {
131 return Scanner.isPHPIdentifierPart(ch);
135 static final class CamelCaseIdentifier extends Run {
137 private static final int S_INIT = 0;
139 private static final int S_LOWER = 1;
141 private static final int S_ONE_CAP = 2;
143 private static final int S_ALL_CAPS = 3;
145 private static final int S_EXIT = 4;
147 private static final int S_EXIT_MINUS_ONE = 5;
149 /* character types */
150 private static final int K_INVALID = 0;
152 private static final int K_LOWER = 1;
154 private static final int K_UPPER = 2;
156 private static final int K_OTHER = 3;
160 private final static int[][] MATRIX = new int[][] {
161 // K_INVALID, K_LOWER, K_UPPER, K_OTHER
162 { S_EXIT, S_LOWER, S_ONE_CAP, S_LOWER }, // S_INIT
163 { S_EXIT, S_LOWER, S_EXIT, S_LOWER }, // S_LOWER
164 { S_EXIT, S_LOWER, S_ALL_CAPS, S_LOWER }, // S_ONE_CAP
165 { S_EXIT, S_EXIT_MINUS_ONE, S_ALL_CAPS, S_LOWER }, // S_ALL_CAPS
169 * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#init()
171 protected void init() {
177 * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#consumes(char)
179 protected boolean consume(char ch) {
180 int kind = getKind(ch);
181 fState = MATRIX[fState][kind];
190 case S_EXIT_MINUS_ONE:
194 Assert.isTrue(false);
200 * Determines the kind of a character.
203 * the character to test
205 private int getKind(char ch) {
206 if (Character.isUpperCase(ch))
208 if (Character.isLowerCase(ch))
210 if (Scanner.isPHPIdentifierPart(ch)) // _, digits...
216 * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
218 protected boolean isValid(char ch) {
219 return Scanner.isPHPIdentifierPart(ch);
223 static final class Other extends Run {
225 * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
227 protected boolean isValid(char ch) {
228 return !Character.isWhitespace(ch)
229 && !Scanner.isPHPIdentifierPart(ch);
233 private static final Run WHITESPACE = new Whitespace();
235 private static final Run DELIMITER = new LineDelimiter();
237 private static final Run CAMELCASE = new CamelCaseIdentifier(); // new
240 private static final Run OTHER = new Other();
242 /** The platform break iterator (word instance) used as a base. */
243 protected final BreakIterator fIterator;
245 /** The text we operate on. */
246 protected CharSequence fText;
248 /** our current position for the stateful methods. */
252 * Creates a new break iterator.
254 public JavaBreakIterator() {
255 fIterator = BreakIterator.getWordInstance();
256 fIndex = fIterator.current();
260 * @see java.text.BreakIterator#current()
262 public int current() {
267 * @see java.text.BreakIterator#first()
270 fIndex = fIterator.first();
275 * @see java.text.BreakIterator#following(int)
277 public int following(int offset) {
278 // work around too eager IAEs in standard impl
279 if (offset == getText().getEndIndex())
282 int next = fIterator.following(offset);
286 // TODO deal with complex script word boundaries
287 // Math.min(offset + run.length, next) does not work
288 // since wordinstance considers _ as boundaries
289 // seems to work fine, however
290 Run run = consumeRun(offset);
291 return offset + run.length;
296 * Consumes a run of characters at the limits of which we introduce a break.
299 * the offset to start at
300 * @return the run that was consumed
302 private Run consumeRun(int offset) {
303 // assert offset < length
305 char ch = fText.charAt(offset);
306 int length = fText.length();
307 Run run = getRun(ch);
308 while (run.consume(ch) && offset < length - 1) {
310 ch = fText.charAt(offset);
317 * Retunrs a run based on a character.
320 * the character to test
321 * @return the correct character given <code>ch</code>
323 private Run getRun(char ch) {
325 if (WHITESPACE.isValid(ch))
327 else if (DELIMITER.isValid(ch))
329 else if (CAMELCASE.isValid(ch))
331 else if (OTHER.isValid(ch))
334 Assert.isTrue(false);
343 * @see java.text.BreakIterator#getText()
345 public CharacterIterator getText() {
346 return fIterator.getText();
350 * @see java.text.BreakIterator#isBoundary(int)
352 public boolean isBoundary(int offset) {
353 if (offset == getText().getBeginIndex())
356 return following(offset - 1) == offset;
360 * @see java.text.BreakIterator#last()
363 fIndex = fIterator.last();
368 * @see java.text.BreakIterator#next()
371 fIndex = following(fIndex);
376 * @see java.text.BreakIterator#next(int)
378 public int next(int n) {
379 return fIterator.next(n);
383 * @see java.text.BreakIterator#preceding(int)
385 public int preceding(int offset) {
386 if (offset == getText().getBeginIndex())
389 if (isBoundary(offset - 1))
392 int previous = offset - 1;
394 previous = fIterator.preceding(previous);
395 } while (!isBoundary(previous));
398 while (previous < offset) {
400 previous = following(previous);
407 * @see java.text.BreakIterator#previous()
409 public int previous() {
410 fIndex = preceding(fIndex);
415 * @see java.text.BreakIterator#setText(java.lang.String)
417 public void setText(String newText) {
418 setText((CharSequence) newText);
422 * Creates a break iterator given a char sequence.
427 public void setText(CharSequence newText) {
429 fIterator.setText(new SequenceCharacterIterator(newText));
434 * @see java.text.BreakIterator#setText(java.text.CharacterIterator)
436 public void setText(CharacterIterator newText) {
437 if (newText instanceof CharSequence) {
438 fText = (CharSequence) newText;
439 fIterator.setText(newText);
442 throw new UnsupportedOperationException(
443 "CharacterIterator not supported"); //$NON-NLS-1$