going crazy...
[phpeclipse.git] / net.sourceforge.phpeclipse.externaltools / src / net / sourceforge / phpdt / externaltools / model / StringMatcher.java
1 package net.sourceforge.phpdt.externaltools.model;
2
3 /**********************************************************************
4  Copyright (c) 2000, 2002 IBM Corp.  All rights reserved.
5  This file is made available under the terms of the Common Public License v1.0
6  which accompanies this distribution, and is available at
7  http://www.eclipse.org/legal/cpl-v10.html
8  **********************************************************************/
9
10 import java.util.Vector;
11
12 /**
13  * Copied from net.sourceforge.phpdt.internal.ui.util.StringMatcher
14  * 
15  * A string pattern matcher, suppporting * and ? wildcards.
16  */
17 public class StringMatcher {
18         protected String fPattern;
19
20         protected int fLength; // pattern length
21
22         protected boolean fIgnoreWildCards;
23
24         protected boolean fIgnoreCase;
25
26         protected boolean fHasLeadingStar;
27
28         protected boolean fHasTrailingStar;
29
30         protected String fSegments[]; // the given pattern is split into *
31                                                                         // separated segments
32
33         /* boundary value beyond which we don't need to search in the text */
34         protected int fBound = 0;
35
36         protected static final char fSingleWildCard = '\u0000';
37
38         public static class Position {
39                 int start; // inclusive
40
41                 int end; // exclusive
42
43                 public Position(int start, int end) {
44                         this.start = start;
45                         this.end = end;
46                 }
47
48                 public int getStart() {
49                         return start;
50                 }
51
52                 public int getEnd() {
53                         return end;
54                 }
55         }
56
57         /**
58          * StringMatcher constructor takes in a String object that is a simple
59          * pattern which may contain �*� for 0 and many characters and �?� for
60          * exactly one character.
61          * 
62          * Literal '*' and '?' characters must be escaped in the pattern e.g., "\*"
63          * means literal "*", etc.
64          * 
65          * Escaping any other character (including the escape character itself),
66          * just results in that character in the pattern. e.g., "\a" means "a" and
67          * "\\" means "\"
68          * 
69          * If invoking the StringMatcher with string literals in Java, don't forget
70          * escape characters are represented by "\\".
71          * 
72          * @param pattern
73          *            the pattern to match text against
74          * @param ignoreCase
75          *            if true, case is ignored
76          * @param ignoreWildCards
77          *            if true, wild cards and their escape sequences are ignored
78          *            (everything is taken literally).
79          */
80         public StringMatcher(String pattern, boolean ignoreCase,
81                         boolean ignoreWildCards) {
82                 if (pattern == null)
83                         throw new IllegalArgumentException();
84                 fIgnoreCase = ignoreCase;
85                 fIgnoreWildCards = ignoreWildCards;
86                 fPattern = pattern;
87                 fLength = pattern.length();
88
89                 if (fIgnoreWildCards) {
90                         parseNoWildCards();
91                 } else {
92                         parseWildCards();
93                 }
94         }
95
96         /**
97          * Find the first occurrence of the pattern between
98          * <code>start</code)(inclusive)
99          * and <code>end</code>(exclusive).
100          * @param <code>text</code>, the String object to search in
101          * @param <code>start</code>, the starting index of the search range, inclusive
102          * @param <code>end</code>, the ending index of the search range, exclusive
103          * @return an <code>StringMatcher.Position</code> object that keeps the starting
104          * (inclusive) and ending positions (exclusive) of the first occurrence of the
105          * pattern in the specified range of the text; return null if not found or subtext
106          * is empty (start==end). A pair of zeros is returned if pattern is empty string
107          * Note that for pattern like "*abc*" with leading and trailing stars, position of "abc"
108          * is returned. For a pattern like"*??*" in text "abcdf", (1,3) is returned
109          */
110         public StringMatcher.Position find(String text, int start, int end) {
111                 if (text == null)
112                         throw new IllegalArgumentException();
113
114                 int tlen = text.length();
115                 if (start < 0)
116                         start = 0;
117                 if (end > tlen)
118                         end = tlen;
119                 if (end < 0 || start >= end)
120                         return null;
121                 if (fLength == 0)
122                         return new Position(start, start);
123                 if (fIgnoreWildCards) {
124                         int x = posIn(text, start, end);
125                         if (x < 0)
126                                 return null;
127                         return new Position(x, x + fLength);
128                 }
129
130                 int segCount = fSegments.length;
131                 if (segCount == 0)// pattern contains only '*'(s)
132                         return new Position(start, end);
133
134                 int curPos = start;
135                 int matchStart = -1;
136                 int i;
137                 for (i = 0; i < segCount && curPos < end; ++i) {
138                         String current = fSegments[i];
139                         int nextMatch = regExpPosIn(text, curPos, end, current);
140                         if (nextMatch < 0)
141                                 return null;
142                         if (i == 0)
143                                 matchStart = nextMatch;
144                         curPos = nextMatch + current.length();
145                 }
146                 if (i < segCount)
147                         return null;
148                 return new Position(matchStart, curPos);
149         }
150
151         /**
152          * match the given <code>text</code> with the pattern
153          * 
154          * @return true if matched eitherwise false
155          * @param <code>text</code>, a String object
156          */
157         public boolean match(String text) {
158                 return match(text, 0, text.length());
159         }
160
161         /**
162          * Given the starting (inclusive) and the ending (exclusive) positions in
163          * the <code>text</code>, determine if the given substring matches with
164          * aPattern
165          * 
166          * @return true if the specified portion of the text matches the pattern
167          * @param String
168          *            <code>text</code>, a String object that contains the
169          *            substring to match
170          * @param int
171          *            <code>start<code> marks the starting position (inclusive) of the substring
172          * @param int <code>end<code> marks the ending index (exclusive) of the substring
173          */
174         public boolean match(String text, int start, int end) {
175                 if (null == text)
176                         throw new IllegalArgumentException();
177
178                 if (start > end)
179                         return false;
180
181                 if (fIgnoreWildCards)
182                         return (end - start == fLength)
183                                         && fPattern.regionMatches(fIgnoreCase, 0, text, start,
184                                                         fLength);
185                 int segCount = fSegments.length;
186                 if (segCount == 0 && (fHasLeadingStar || fHasTrailingStar)) // pattern
187                                                                                                                                         // contains
188                                                                                                                                         // only
189                                                                                                                                         // '*'(s)
190                         return true;
191                 if (start == end)
192                         return fLength == 0;
193                 if (fLength == 0)
194                         return start == end;
195
196                 int tlen = text.length();
197                 if (start < 0)
198                         start = 0;
199                 if (end > tlen)
200                         end = tlen;
201
202                 int tCurPos = start;
203                 int bound = end - fBound;
204                 if (bound < 0)
205                         return false;
206                 int i = 0;
207                 String current = fSegments[i];
208                 int segLength = current.length();
209
210                 /* process first segment */
211                 if (!fHasLeadingStar) {
212                         if (!regExpRegionMatches(text, start, current, 0, segLength)) {
213                                 return false;
214                         } else {
215                                 ++i;
216                                 tCurPos = tCurPos + segLength;
217                         }
218                 }
219
220                 /* process middle segments */
221                 while (i < segCount) {
222                         current = fSegments[i];
223                         int currentMatch;
224                         int k = current.indexOf(fSingleWildCard);
225                         if (k < 0) {
226                                 currentMatch = textPosIn(text, tCurPos, end, current);
227                                 if (currentMatch < 0)
228                                         return false;
229                         } else {
230                                 currentMatch = regExpPosIn(text, tCurPos, end, current);
231                                 if (currentMatch < 0)
232                                         return false;
233                         }
234                         tCurPos = currentMatch + current.length();
235                         i++;
236                 }
237
238                 /* process final segment */
239                 if (!fHasTrailingStar && tCurPos != end) {
240                         int clen = current.length();
241                         return regExpRegionMatches(text, end - clen, current, 0, clen);
242                 }
243                 return i == segCount;
244         }
245
246         /**
247          * This method parses the given pattern into segments seperated by wildcard
248          * '*' characters. Since wildcards are not being used in this case, the
249          * pattern consists of a single segment.
250          */
251         private void parseNoWildCards() {
252                 fSegments = new String[1];
253                 fSegments[0] = fPattern;
254                 fBound = fLength;
255         }
256
257         /**
258          * Parses the given pattern into segments seperated by wildcard '*'
259          * characters.
260          * 
261          * @param p,
262          *            a String object that is a simple regular expression with �*�
263          *            and/or �?�
264          */
265         private void parseWildCards() {
266                 if (fPattern.startsWith("*"))//$NON-NLS-1$
267                         fHasLeadingStar = true;
268                 if (fPattern.endsWith("*")) {//$NON-NLS-1$
269                         /* make sure it's not an escaped wildcard */
270                         if (fLength > 1 && fPattern.charAt(fLength - 2) != '\\') {
271                                 fHasTrailingStar = true;
272                         }
273                 }
274
275                 Vector temp = new Vector();
276
277                 int pos = 0;
278                 StringBuffer buf = new StringBuffer();
279                 while (pos < fLength) {
280                         char c = fPattern.charAt(pos++);
281                         switch (c) {
282                         case '\\':
283                                 if (pos >= fLength) {
284                                         buf.append(c);
285                                 } else {
286                                         char next = fPattern.charAt(pos++);
287                                         /* if it's an escape sequence */
288                                         if (next == '*' || next == '?' || next == '\\') {
289                                                 buf.append(next);
290                                         } else {
291                                                 /* not an escape sequence, just insert literally */
292                                                 buf.append(c);
293                                                 buf.append(next);
294                                         }
295                                 }
296                                 break;
297                         case '*':
298                                 if (buf.length() > 0) {
299                                         /* new segment */
300                                         temp.addElement(buf.toString());
301                                         fBound += buf.length();
302                                         buf.setLength(0);
303                                 }
304                                 break;
305                         case '?':
306                                 /* append special character representing single match wildcard */
307                                 buf.append(fSingleWildCard);
308                                 break;
309                         default:
310                                 buf.append(c);
311                         }
312                 }
313
314                 /* add last buffer to segment list */
315                 if (buf.length() > 0) {
316                         temp.addElement(buf.toString());
317                         fBound += buf.length();
318                 }
319
320                 fSegments = new String[temp.size()];
321                 temp.copyInto(fSegments);
322         }
323
324         /**
325          * @param <code>text</code>, a string which contains no wildcard
326          * @param <code>start</code>, the starting index in the text for search,
327          *            inclusive
328          * @param <code>end</code>, the stopping point of search, exclusive
329          * @return the starting index in the text of the pattern , or -1 if not
330          *         found
331          */
332         protected int posIn(String text, int start, int end) {// no wild card in
333                                                                                                                         // pattern
334                 int max = end - fLength;
335
336                 if (!fIgnoreCase) {
337                         int i = text.indexOf(fPattern, start);
338                         if (i == -1 || i > max)
339                                 return -1;
340                         return i;
341                 }
342
343                 for (int i = start; i <= max; ++i) {
344                         if (text.regionMatches(true, i, fPattern, 0, fLength))
345                                 return i;
346                 }
347
348                 return -1;
349         }
350
351         /**
352          * @param <code>text</code>, a simple regular expression that may only
353          *            contain '?'(s)
354          * @param <code>start</code>, the starting index in the text for search,
355          *            inclusive
356          * @param <code>end</code>, the stopping point of search, exclusive
357          * @param <code>p</code>, a simple regular expression that may contains '?'
358          * @param <code>caseIgnored</code>, wether the pattern is not casesensitive
359          * @return the starting index in the text of the pattern , or -1 if not
360          *         found
361          */
362         protected int regExpPosIn(String text, int start, int end, String p) {
363                 int plen = p.length();
364
365                 int max = end - plen;
366                 for (int i = start; i <= max; ++i) {
367                         if (regExpRegionMatches(text, i, p, 0, plen))
368                                 return i;
369                 }
370                 return -1;
371         }
372
373         /**
374          * 
375          * @return boolean
376          * @param <code>text</code>, a String to match
377          * @param <code>start</code>, int that indicates the starting index of
378          *            match, inclusive
379          * @param <code>end</code> int that indicates the ending index of match,
380          *            exclusive
381          * @param <code>p</code>, String, String, a simple regular expression that
382          *            may contain '?'
383          * @param <code>ignoreCase</code>, boolean indicating wether code>p</code>
384          *            is case sensitive
385          */
386         protected boolean regExpRegionMatches(String text, int tStart, String p,
387                         int pStart, int plen) {
388                 while (plen-- > 0) {
389                         char tchar = text.charAt(tStart++);
390                         char pchar = p.charAt(pStart++);
391
392                         /* process wild cards */
393                         if (!fIgnoreWildCards) {
394                                 /* skip single wild cards */
395                                 if (pchar == fSingleWildCard) {
396                                         continue;
397                                 }
398                         }
399                         if (pchar == tchar)
400                                 continue;
401                         if (fIgnoreCase) {
402                                 if (Character.toUpperCase(tchar) == Character
403                                                 .toUpperCase(pchar))
404                                         continue;
405                                 // comparing after converting to upper case doesn't handle all
406                                 // cases;
407                                 // also compare after converting to lower case
408                                 if (Character.toLowerCase(tchar) == Character
409                                                 .toLowerCase(pchar))
410                                         continue;
411                         }
412                         return false;
413                 }
414                 return true;
415         }
416
417         /**
418          * @param <code>text</code>, the string to match
419          * @param <code>start</code>, the starting index in the text for search,
420          *            inclusive
421          * @param <code>end</code>, the stopping point of search, exclusive
422          * @param code>p
423          *            </code>, a string that has no wildcard
424          * @param <code>
425          *            ignoreCase</code>, boolean indicating wether code>p</code>
426          *            is case sensitive
427          * @return the starting index in the text of the pattern , or -1 if not
428          *         found
429          */
430         protected int textPosIn(String text, int start, int end, String p) {
431
432                 int plen = p.length();
433                 int max = end - plen;
434
435                 if (!fIgnoreCase) {
436                         int i = text.indexOf(p, start);
437                         if (i == -1 || i > max)
438                                 return -1;
439                         return i;
440                 }
441
442                 for (int i = start; i <= max; ++i) {
443                         if (text.regionMatches(true, i, p, 0, plen))
444                                 return i;
445                 }
446
447                 return -1;
448         }
449 }