1 /*******************************************************************************
 
   2  * Copyright (c) 2000, 2004 IBM Corporation and others.
 
   3  * All rights reserved. This program and the accompanying materials 
 
   4  * are made available under the terms of the Common Public License v1.0
 
   5  * which accompanies this distribution, and is available at
 
   6  * http://www.eclipse.org/legal/cpl-v10.html
 
   9  *     IBM Corporation - initial API and implementation
 
  10  *******************************************************************************/
 
  11 package net.sourceforge.phpdt.internal.ui.text;
 
  13 import java.text.BreakIterator;
 
  14 import java.text.CharacterIterator;
 
  16 import net.sourceforge.phpdt.internal.compiler.parser.Scanner;
 
  18 import org.eclipse.jface.text.Assert;
 
  21  * A java break iterator. It returns all breaks, including before and after
 
  22  * whitespace, and it returns all camelcase breaks.
 
  24  * A line break may be any of "\n", "\r", "\r\n", "\n\r".
 
  29 public class JavaBreakIterator extends BreakIterator {
 
  32          * A run of common characters.
 
  34         protected static abstract class Run {
 
  35                 /** The length of this run. */
 
  43                  * Returns <code>true</code> if this run consumes <code>ch</code>,
 
  44                  * <code>false</code> otherwise. If <code>true</code> is returned,
 
  45                  * the length of the receiver is adjusted accordingly.
 
  48                  *            the character to test
 
  49                  * @return <code>true</code> if <code>ch</code> was consumed
 
  51                 protected boolean consume(char ch) {
 
  60                  * Whether this run accepts that character; does not update state.
 
  61                  * Called from the default implementation of <code>consume</code>.
 
  64                  *            the character to test
 
  65                  * @return <code>true</code> if <code>ch</code> is accepted
 
  67                 protected abstract boolean isValid(char ch);
 
  70                  * Resets this run to the initial state.
 
  72                 protected void init() {
 
  77         static final class Whitespace extends Run {
 
  78                 protected boolean isValid(char ch) {
 
  79                         return Character.isWhitespace(ch) && ch != '\n' && ch != '\r';
 
  83         static final class LineDelimiter extends Run {
 
  84                 /** State: INIT -> delimiter -> EXIT. */
 
  87                 private static final char INIT = '\0';
 
  89                 private static final char EXIT = '\1';
 
  92                  * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#init()
 
  94                 protected void init() {
 
 100                  * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#consume(char)
 
 102                 protected boolean consume(char ch) {
 
 103                         if (!isValid(ch) || fState == EXIT)
 
 106                         if (fState == INIT) {
 
 110                         } else if (fState != ch) {
 
 119                 protected boolean isValid(char ch) {
 
 120                         return ch == '\n' || ch == '\r';
 
 124         static final class Identifier extends Run {
 
 126                  * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
 
 128                 protected boolean isValid(char ch) {
 
 129                         return Scanner.isPHPIdentifierPart(ch);
 
 133         static final class CamelCaseIdentifier extends Run {
 
 135                 private static final int S_INIT = 0;
 
 137                 private static final int S_LOWER = 1;
 
 139                 private static final int S_ONE_CAP = 2;
 
 141                 private static final int S_ALL_CAPS = 3;
 
 143                 private static final int S_EXIT = 4;
 
 145                 private static final int S_EXIT_MINUS_ONE = 5;
 
 147                 /* character types */
 
 148                 private static final int K_INVALID = 0;
 
 150                 private static final int K_LOWER = 1;
 
 152                 private static final int K_UPPER = 2;
 
 154                 private static final int K_OTHER = 3;
 
 158                 private final static int[][] MATRIX = new int[][] {
 
 159                 // K_INVALID, K_LOWER, K_UPPER, K_OTHER
 
 160                                 { S_EXIT, S_LOWER, S_ONE_CAP, S_LOWER }, // S_INIT
 
 161                                 { S_EXIT, S_LOWER, S_EXIT, S_LOWER }, // S_LOWER
 
 162                                 { S_EXIT, S_LOWER, S_ALL_CAPS, S_LOWER }, // S_ONE_CAP
 
 163                                 { S_EXIT, S_EXIT_MINUS_ONE, S_ALL_CAPS, S_LOWER }, // S_ALL_CAPS
 
 167                  * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#init()
 
 169                 protected void init() {
 
 175                  * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#consumes(char)
 
 177                 protected boolean consume(char ch) {
 
 178                         int kind = getKind(ch);
 
 179                         fState = MATRIX[fState][kind];
 
 188                         case S_EXIT_MINUS_ONE:
 
 192                                 Assert.isTrue(false);
 
 198                  * Determines the kind of a character.
 
 201                  *            the character to test
 
 203                 private int getKind(char ch) {
 
 204                         if (Character.isUpperCase(ch))
 
 206                         if (Character.isLowerCase(ch))
 
 208                         if (Scanner.isPHPIdentifierPart(ch)) // _, digits...
 
 214                  * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
 
 216                 protected boolean isValid(char ch) {
 
 217                         return Scanner.isPHPIdentifierPart(ch);
 
 221         static final class Other extends Run {
 
 223                  * @see net.sourceforge.phpdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
 
 225                 protected boolean isValid(char ch) {
 
 226                         return !Character.isWhitespace(ch)
 
 227                                         && !Scanner.isPHPIdentifierPart(ch);
 
 231         private static final Run WHITESPACE = new Whitespace();
 
 233         private static final Run DELIMITER = new LineDelimiter();
 
 235         private static final Run CAMELCASE = new CamelCaseIdentifier(); // new
 
 238         private static final Run OTHER = new Other();
 
 240         /** The platform break iterator (word instance) used as a base. */
 
 241         protected final BreakIterator fIterator;
 
 243         /** The text we operate on. */
 
 244         protected CharSequence fText;
 
 246         /** our current position for the stateful methods. */
 
 250          * Creates a new break iterator.
 
 252         public JavaBreakIterator() {
 
 253                 fIterator = BreakIterator.getWordInstance();
 
 254                 fIndex = fIterator.current();
 
 258          * @see java.text.BreakIterator#current()
 
 260         public int current() {
 
 265          * @see java.text.BreakIterator#first()
 
 268                 fIndex = fIterator.first();
 
 273          * @see java.text.BreakIterator#following(int)
 
 275         public int following(int offset) {
 
 276                 // work around too eager IAEs in standard impl
 
 277                 if (offset == getText().getEndIndex())
 
 280                 int next = fIterator.following(offset);
 
 284                 // TODO deal with complex script word boundaries
 
 285                 // Math.min(offset + run.length, next) does not work
 
 286                 // since wordinstance considers _ as boundaries
 
 287                 // seems to work fine, however
 
 288                 Run run = consumeRun(offset);
 
 289                 return offset + run.length;
 
 294          * Consumes a run of characters at the limits of which we introduce a break.
 
 297          *            the offset to start at
 
 298          * @return the run that was consumed
 
 300         private Run consumeRun(int offset) {
 
 301                 // assert offset < length
 
 303                 char ch = fText.charAt(offset);
 
 304                 int length = fText.length();
 
 305                 Run run = getRun(ch);
 
 306                 while (run.consume(ch) && offset < length - 1) {
 
 308                         ch = fText.charAt(offset);
 
 315          * Retunrs a run based on a character.
 
 318          *            the character to test
 
 319          * @return the correct character given <code>ch</code>
 
 321         private Run getRun(char ch) {
 
 323                 if (WHITESPACE.isValid(ch))
 
 325                 else if (DELIMITER.isValid(ch))
 
 327                 else if (CAMELCASE.isValid(ch))
 
 329                 else if (OTHER.isValid(ch))
 
 332                         Assert.isTrue(false);
 
 341          * @see java.text.BreakIterator#getText()
 
 343         public CharacterIterator getText() {
 
 344                 return fIterator.getText();
 
 348          * @see java.text.BreakIterator#isBoundary(int)
 
 350         public boolean isBoundary(int offset) {
 
 351                 if (offset == getText().getBeginIndex())
 
 354                         return following(offset - 1) == offset;
 
 358          * @see java.text.BreakIterator#last()
 
 361                 fIndex = fIterator.last();
 
 366          * @see java.text.BreakIterator#next()
 
 369                 fIndex = following(fIndex);
 
 374          * @see java.text.BreakIterator#next(int)
 
 376         public int next(int n) {
 
 377                 return fIterator.next(n);
 
 381          * @see java.text.BreakIterator#preceding(int)
 
 383         public int preceding(int offset) {
 
 384                 if (offset == getText().getBeginIndex())
 
 387                 if (isBoundary(offset - 1))
 
 390                 int previous = offset - 1;
 
 392                         previous = fIterator.preceding(previous);
 
 393                 } while (!isBoundary(previous));
 
 396                 while (previous < offset) {
 
 398                         previous = following(previous);
 
 405          * @see java.text.BreakIterator#previous()
 
 407         public int previous() {
 
 408                 fIndex = preceding(fIndex);
 
 413          * @see java.text.BreakIterator#setText(java.lang.String)
 
 415         public void setText(String newText) {
 
 416                 setText((CharSequence) newText);
 
 420          * Creates a break iterator given a char sequence.
 
 425         public void setText(CharSequence newText) {
 
 427                 fIterator.setText(new SequenceCharacterIterator(newText));
 
 432          * @see java.text.BreakIterator#setText(java.text.CharacterIterator)
 
 434         public void setText(CharacterIterator newText) {
 
 435                 if (newText instanceof CharSequence) {
 
 436                         fText = (CharSequence) newText;
 
 437                         fIterator.setText(newText);
 
 440                         throw new UnsupportedOperationException(
 
 441                                         "CharacterIterator not supported"); //$NON-NLS-1$