1 /*******************************************************************************
2 * Copyright (c) 2000, 2004 IBM Corporation and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v10.html
9 * IBM Corporation - initial API and implementation
10 *******************************************************************************/
11 package net.sourceforge.phpdt.internal.ui.text;
13 import java.text.BreakIterator;
14 import java.text.CharacterIterator;
16 import net.sourceforge.phpdt.internal.compiler.parser.Scanner;
18 import org.eclipse.jface.text.Assert;
22 * A java break iterator. It returns all breaks, including before and after
23 * whitespace, and it returns all camelcase breaks.
25 * A line break may be any of "\n", "\r", "\r\n", "\n\r".
30 public class JavaBreakIterator extends BreakIterator {
33 * A run of common characters.
35 protected static abstract class Run {
36 /** The length of this run. */
44 * Returns <code>true</code> if this run consumes <code>ch</code>,
45 * <code>false</code> otherwise. If <code>true</code> is returned,
46 * the length of the receiver is adjusted accordingly.
48 * @param ch the character to test
49 * @return <code>true</code> if <code>ch</code> was consumed
51 protected boolean consume(char ch) {
60 * Whether this run accepts that character; does not update state. Called
61 * from the default implementation of <code>consume</code>.
63 * @param ch the character to test
64 * @return <code>true</code> if <code>ch</code> is accepted
66 protected abstract boolean isValid(char ch);
69 * Resets this run to the initial state.
71 protected void init() {
76 static final class Whitespace extends Run {
77 protected boolean isValid(char ch) {
78 return Character.isWhitespace(ch) && ch != '\n' && ch != '\r';
82 static final class LineDelimiter extends Run {
83 /** State: INIT -> delimiter -> EXIT. */
85 private static final char INIT= '\0';
86 private static final char EXIT= '\1';
89 * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#init()
91 protected void init() {
97 * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#consume(char)
99 protected boolean consume(char ch) {
100 if (!isValid(ch) || fState == EXIT)
103 if (fState == INIT) {
107 } else if (fState != ch) {
116 protected boolean isValid(char ch) {
117 return ch == '\n' || ch == '\r';
121 static final class Identifier extends Run {
123 * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
125 protected boolean isValid(char ch) {
126 return Scanner.isPHPIdentifierPart(ch);
130 static final class CamelCaseIdentifier extends Run {
132 private static final int S_INIT= 0;
133 private static final int S_LOWER= 1;
134 private static final int S_ONE_CAP= 2;
135 private static final int S_ALL_CAPS= 3;
136 private static final int S_EXIT= 4;
137 private static final int S_EXIT_MINUS_ONE= 5;
139 /* character types */
140 private static final int K_INVALID= 0;
141 private static final int K_LOWER= 1;
142 private static final int K_UPPER= 2;
143 private static final int K_OTHER= 3;
147 private final static int[][] MATRIX= new int[][] {
148 // K_INVALID, K_LOWER, K_UPPER, K_OTHER
149 { S_EXIT, S_LOWER, S_ONE_CAP, S_LOWER }, // S_INIT
150 { S_EXIT, S_LOWER, S_EXIT, S_LOWER }, // S_LOWER
151 { S_EXIT, S_LOWER, S_ALL_CAPS, S_LOWER }, // S_ONE_CAP
152 { S_EXIT, S_EXIT_MINUS_ONE, S_ALL_CAPS, S_LOWER }, // S_ALL_CAPS
156 * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#init()
158 protected void init() {
164 * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#consumes(char)
166 protected boolean consume(char ch) {
167 int kind= getKind(ch);
168 fState= MATRIX[fState][kind];
177 case S_EXIT_MINUS_ONE:
181 Assert.isTrue(false);
187 * Determines the kind of a character.
189 * @param ch the character to test
191 private int getKind(char ch) {
192 if (Character.isUpperCase(ch))
194 if (Character.isLowerCase(ch))
196 if (Scanner.isPHPIdentifierPart(ch)) // _, digits...
202 * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
204 protected boolean isValid(char ch) {
205 return Scanner.isPHPIdentifierPart(ch);
209 static final class Other extends Run {
211 * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
213 protected boolean isValid(char ch) {
214 return !Character.isWhitespace(ch) && !Scanner.isPHPIdentifierPart(ch);
218 private static final Run WHITESPACE= new Whitespace();
219 private static final Run DELIMITER= new LineDelimiter();
220 private static final Run CAMELCASE= new CamelCaseIdentifier(); // new Identifier();
221 private static final Run OTHER= new Other();
223 /** The platform break iterator (word instance) used as a base. */
224 protected final BreakIterator fIterator;
225 /** The text we operate on. */
226 protected CharSequence fText;
227 /** our current position for the stateful methods. */
232 * Creates a new break iterator.
234 public JavaBreakIterator() {
235 fIterator= BreakIterator.getWordInstance();
236 fIndex= fIterator.current();
240 * @see java.text.BreakIterator#current()
242 public int current() {
247 * @see java.text.BreakIterator#first()
250 fIndex= fIterator.first();
255 * @see java.text.BreakIterator#following(int)
257 public int following(int offset) {
258 // work around too eager IAEs in standard impl
259 if (offset == getText().getEndIndex())
262 int next= fIterator.following(offset);
266 // TODO deal with complex script word boundaries
267 // Math.min(offset + run.length, next) does not work
268 // since wordinstance considers _ as boundaries
269 // seems to work fine, however
270 Run run= consumeRun(offset);
271 return offset + run.length;
276 * Consumes a run of characters at the limits of which we introduce a break.
277 * @param offset the offset to start at
278 * @return the run that was consumed
280 private Run consumeRun(int offset) {
281 // assert offset < length
283 char ch= fText.charAt(offset);
284 int length= fText.length();
286 while (run.consume(ch) && offset < length - 1) {
288 ch= fText.charAt(offset);
295 * Retunrs a run based on a character.
297 * @param ch the character to test
298 * @return the correct character given <code>ch</code>
300 private Run getRun(char ch) {
302 if (WHITESPACE.isValid(ch))
304 else if (DELIMITER.isValid(ch))
306 else if (CAMELCASE.isValid(ch))
308 else if (OTHER.isValid(ch))
311 Assert.isTrue(false);
320 * @see java.text.BreakIterator#getText()
322 public CharacterIterator getText() {
323 return fIterator.getText();
327 * @see java.text.BreakIterator#isBoundary(int)
329 public boolean isBoundary(int offset) {
330 if (offset == getText().getBeginIndex())
333 return following(offset - 1) == offset;
337 * @see java.text.BreakIterator#last()
340 fIndex= fIterator.last();
345 * @see java.text.BreakIterator#next()
348 fIndex= following(fIndex);
353 * @see java.text.BreakIterator#next(int)
355 public int next(int n) {
356 return fIterator.next(n);
360 * @see java.text.BreakIterator#preceding(int)
362 public int preceding(int offset) {
363 if (offset == getText().getBeginIndex())
366 if (isBoundary(offset - 1))
369 int previous= offset - 1;
371 previous= fIterator.preceding(previous);
372 } while (!isBoundary(previous));
375 while (previous < offset) {
377 previous= following(previous);
384 * @see java.text.BreakIterator#previous()
386 public int previous() {
387 fIndex= preceding(fIndex);
392 * @see java.text.BreakIterator#setText(java.lang.String)
394 public void setText(String newText) {
395 setText((CharSequence) newText);
399 * Creates a break iterator given a char sequence.
400 * @param newText the new text
402 public void setText(CharSequence newText) {
404 fIterator.setText(new SequenceCharacterIterator(newText));
409 * @see java.text.BreakIterator#setText(java.text.CharacterIterator)
411 public void setText(CharacterIterator newText) {
412 if (newText instanceof CharSequence) {
413 fText= (CharSequence) newText;
414 fIterator.setText(newText);
417 throw new UnsupportedOperationException("CharacterIterator not supported"); //$NON-NLS-1$