1 package net.sourceforge.phpdt.internal.ui.util;
7 * A string pattern matcher, suppporting * and ? wildcards.
9 public class StringMatcher {
10 protected String fPattern;
11 protected int fLength; // pattern length
12 protected boolean fIgnoreWildCards;
13 protected boolean fIgnoreCase;
14 protected boolean fHasLeadingStar;
15 protected boolean fHasTrailingStar;
16 protected String fSegments[]; //the given pattern is split into * separated segments
18 /* boundary value beyond which we don't need to search in the text */
19 protected int fBound= 0;
22 protected static final char fSingleWildCard= '\u0000';
24 public static class Position {
25 int start; //inclusive
27 public Position(int start, int end) {
31 public int getStart() {
39 * StringMatcher constructor takes in a String object that is a simple
40 * pattern which may contain * for 0 and many characters and
41 * ? for exactly one character.
43 * Literal '*' and '?' characters must be escaped in the pattern
44 * e.g., "\*" means literal "*", etc.
46 * Escaping any other character (including the escape character itself),
47 * just results in that character in the pattern.
48 * e.g., "\a" means "a" and "\\" means "\"
50 * If invoking the StringMatcher with string literals in Java, don't forget
51 * escape characters are represented by "\\".
53 * @param pattern the pattern to match text against
54 * @param ignoreCase if true, case is ignored
55 * @param ignoreWildCards if true, wild cards and their escape sequences are ignored
56 * (everything is taken literally).
58 public StringMatcher(String pattern, boolean ignoreCase, boolean ignoreWildCards) {
60 throw new IllegalArgumentException();
61 fIgnoreCase= ignoreCase;
62 fIgnoreWildCards= ignoreWildCards;
64 fLength= pattern.length();
66 if (fIgnoreWildCards) {
73 * Find the first occurrence of the pattern between <code>start</code)(inclusive)
74 * and <code>end</code>(exclusive).
75 * @param <code>text</code>, the String object to search in
76 * @param <code>start</code>, the starting index of the search range, inclusive
77 * @param <code>end</code>, the ending index of the search range, exclusive
78 * @return an <code>StringMatcher.Position</code> object that keeps the starting
79 * (inclusive) and ending positions (exclusive) of the first occurrence of the
80 * pattern in the specified range of the text; return null if not found or subtext
81 * is empty (start==end). A pair of zeros is returned if pattern is empty string
82 * Note that for pattern like "*abc*" with leading and trailing stars, position of "abc"
83 * is returned. For a pattern like"*??*" in text "abcdf", (1,3) is returned
85 public StringMatcher.Position find(String text, int start, int end) {
87 throw new IllegalArgumentException();
89 int tlen= text.length();
94 if (end < 0 ||start >= end )
97 return new Position(start, start);
98 if (fIgnoreWildCards) {
99 int x= posIn(text, start, end);
102 return new Position(x, x+fLength);
105 int segCount= fSegments.length;
106 if (segCount == 0)//pattern contains only '*'(s)
107 return new Position (start, end);
112 for (i= 0; i < segCount && curPos < end; ++i) {
113 String current= fSegments[i];
114 int nextMatch= regExpPosIn(text, curPos, end, current);
118 matchStart= nextMatch;
119 curPos= nextMatch + current.length();
123 return new Position(matchStart, curPos);
126 * match the given <code>text</code> with the pattern
127 * @return true if matched eitherwise false
128 * @param <code>text</code>, a String object
130 public boolean match(String text) {
131 return match(text, 0, text.length());
134 * Given the starting (inclusive) and the ending (exclusive) positions in the
135 * <code>text</code>, determine if the given substring matches with aPattern
136 * @return true if the specified portion of the text matches the pattern
137 * @param String <code>text</code>, a String object that contains the substring to match
138 * @param int <code>start<code> marks the starting position (inclusive) of the substring
139 * @param int <code>end<code> marks the ending index (exclusive) of the substring
141 public boolean match(String text, int start, int end) {
143 throw new IllegalArgumentException();
148 if (fIgnoreWildCards)
149 return (end - start == fLength) && fPattern.regionMatches(fIgnoreCase, 0, text, start, fLength);
150 int segCount= fSegments.length;
151 if (segCount == 0 && (fHasLeadingStar || fHasTrailingStar)) // pattern contains only '*'(s)
158 int tlen= text.length();
165 int bound= end - fBound;
169 String current= fSegments[i];
170 int segLength= current.length();
172 /* process first segment */
173 if (!fHasLeadingStar){
174 if(!regExpRegionMatches(text, start, current, 0, segLength)) {
178 tCurPos= tCurPos + segLength;
182 /* process middle segments */
183 while (i < segCount) {
184 current= fSegments[i];
186 int k= current.indexOf(fSingleWildCard);
188 currentMatch= textPosIn(text, tCurPos, end, current);
189 if (currentMatch < 0)
192 currentMatch= regExpPosIn(text, tCurPos, end, current);
193 if (currentMatch < 0)
196 tCurPos= currentMatch + current.length();
200 /* process final segment */
201 if (!fHasTrailingStar && tCurPos != end) {
202 int clen= current.length();
203 return regExpRegionMatches(text, end - clen, current, 0, clen);
205 return i == segCount ;
208 * This method parses the given pattern into segments seperated by wildcard '*' characters.
209 * Since wildcards are not being used in this case, the pattern consists of a single segment.
211 private void parseNoWildCards() {
212 fSegments= new String[1];
213 fSegments[0]= fPattern;
217 * Parses the given pattern into segments seperated by wildcard '*' characters.
218 * @param p, a String object that is a simple regular expression with * and/or ?
220 private void parseWildCards() {
221 if(fPattern.startsWith("*"))//$NON-NLS-1$
222 fHasLeadingStar= true;
223 if(fPattern.endsWith("*")) {//$NON-NLS-1$
224 /* make sure it's not an escaped wildcard */
225 if (fLength > 1 && fPattern.charAt(fLength - 2) != '\\') {
226 fHasTrailingStar= true;
230 Vector temp= new Vector();
233 StringBuffer buf= new StringBuffer();
234 while (pos < fLength) {
235 char c= fPattern.charAt(pos++);
238 if (pos >= fLength) {
241 char next= fPattern.charAt(pos++);
242 /* if it's an escape sequence */
243 if (next == '*' || next == '?' || next == '\\') {
246 /* not an escape sequence, just insert literally */
253 if (buf.length() > 0) {
255 temp.addElement(buf.toString());
256 fBound += buf.length();
261 /* append special character representing single match wildcard */
262 buf.append(fSingleWildCard);
269 /* add last buffer to segment list */
270 if (buf.length() > 0) {
271 temp.addElement(buf.toString());
272 fBound += buf.length();
275 fSegments= new String[temp.size()];
276 temp.copyInto(fSegments);
279 * @param <code>text</code>, a string which contains no wildcard
280 * @param <code>start</code>, the starting index in the text for search, inclusive
281 * @param <code>end</code>, the stopping point of search, exclusive
282 * @return the starting index in the text of the pattern , or -1 if not found
284 protected int posIn(String text, int start, int end) {//no wild card in pattern
285 int max= end - fLength;
288 int i= text.indexOf(fPattern, start);
289 if (i == -1 || i > max)
294 for (int i= start; i <= max; ++i) {
295 if (text.regionMatches(true, i, fPattern, 0, fLength))
302 * @param <code>text</code>, a simple regular expression that may only contain '?'(s)
303 * @param <code>start</code>, the starting index in the text for search, inclusive
304 * @param <code>end</code>, the stopping point of search, exclusive
305 * @param <code>p</code>, a simple regular expression that may contains '?'
306 * @param <code>caseIgnored</code>, wether the pattern is not casesensitive
307 * @return the starting index in the text of the pattern , or -1 if not found
309 protected int regExpPosIn(String text, int start, int end, String p) {
310 int plen= p.length();
313 for (int i= start; i <= max; ++i) {
314 if (regExpRegionMatches(text, i, p, 0, plen))
322 * @param <code>text</code>, a String to match
323 * @param <code>start</code>, int that indicates the starting index of match, inclusive
324 * @param <code>end</code> int that indicates the ending index of match, exclusive
325 * @param <code>p</code>, String, String, a simple regular expression that may contain '?'
326 * @param <code>ignoreCase</code>, boolean indicating wether code>p</code> is case sensitive
328 protected boolean regExpRegionMatches(String text, int tStart, String p, int pStart, int plen) {
330 char tchar= text.charAt(tStart++);
331 char pchar= p.charAt(pStart++);
333 /* process wild cards */
334 if (!fIgnoreWildCards) {
335 /* skip single wild cards */
336 if (pchar == fSingleWildCard) {
343 if (Character.toUpperCase(tchar) == Character.toUpperCase(pchar))
345 // comparing after converting to upper case doesn't handle all cases;
346 // also compare after converting to lower case
347 if (Character.toLowerCase(tchar) == Character.toLowerCase(pchar))
355 * @param <code>text</code>, the string to match
356 * @param <code>start</code>, the starting index in the text for search, inclusive
357 * @param <code>end</code>, the stopping point of search, exclusive
358 * @param code>p</code>, a string that has no wildcard
359 * @param <code>ignoreCase</code>, boolean indicating wether code>p</code> is case sensitive
360 * @return the starting index in the text of the pattern , or -1 if not found
362 protected int textPosIn(String text, int start, int end, String p) {
364 int plen= p.length();
368 int i= text.indexOf(p, start);
369 if (i == -1 || i > max)
374 for (int i= start; i <= max; ++i) {
375 if (text.regionMatches(true, i, p, 0, plen))