blob: 5ff5a00674d9224752158bc45c184104b3a17529 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2009, 2010 Mia-Software.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Nicolas Bros (Mia-Software) - initial API and implementation
* Nicolas Guyomar (Mia-Software) - Bug 333652 Extension point offering the possibility to declare an EPackage browser
*******************************************************************************/
package org.eclipse.emf.facet.common.core.internal.utils;
import java.util.Vector;
import com.ibm.icu.lang.UCharacter;
/**
* A string pattern matcher. Supports '*' and '?' wildcards.
*
* FIXME yet another duplicated version of StringMatcher (Bug 269424)
*/
public class StringMatcher {
private final String fPattern;
private final int fLength; // pattern length
private final boolean fIgnoreWildCards;
private final boolean fIgnoreCase;
private boolean fHasLeadingStar;
private boolean fHasTrailingStar;
private String[] fSegments; // the given pattern is split into * separated
// segments
/* boundary value beyond which we don't need to search in the text */
private int fBound = 0;
private static final char SINGLE_WILD_CARD = '\u0000';
public static class Position {
private final int start; // inclusive
private final int end; // exclusive
public Position(final int start, final int end) {
this.start = start;
this.end = end;
}
public int getStart() {
return this.start;
}
public int getEnd() {
return this.end;
}
}
/**
* StringMatcher constructor takes in a String object that is a simple
* pattern which may contain '*' for 0 and many characters and '?' for
* exactly one character.
*
* Literal '*' and '?' characters must be escaped in the pattern e.g.,
* "\*" means literal "*", etc.
*
* Escaping any other character (including the escape character itself),
* just results in that character in the pattern. e.g., "\a" means "a" and
* "\\" means "\"
*
* If invoking the StringMatcher with string literals in Java, don't forget
* escape characters are represented by "\\".
*
* @param pattern
* the pattern to match text against
* @param ignoreCase
* if true, case is ignored
* @param ignoreWildCards
* if true, wild cards and their escape sequences are ignored
* (everything is taken literally).
*/
public StringMatcher(final String pattern, final boolean ignoreCase,
final boolean ignoreWildCards) {
if (pattern == null) {
throw new IllegalArgumentException();
}
this.fIgnoreCase = ignoreCase;
this.fIgnoreWildCards = ignoreWildCards;
this.fPattern = pattern;
this.fLength = pattern.length();
if (this.fIgnoreWildCards) {
parseNoWildCards();
} else {
parseWildCards();
}
}
/**
* Find the first occurrence of the pattern between <code>start</code>
* )(inclusive) and <code>end</code>(exclusive).
*
* @param text
* the String object to search in
* @param start
* the starting index of the search range, inclusive
* @param end
* the ending index of the search range, exclusive
* @return a <code>StringMatcher.Position</code> object that keeps the
* starting (inclusive) and ending positions (exclusive) of the
* first occurrence of the pattern in the specified range of the
* text; return null if not found or subtext is empty (start==end).
* A pair of zeros is returned if pattern is empty string Note that
* for pattern like "*abc*" with leading and trailing stars,
* position of "abc" is returned. For a pattern like"*??*" in text
* "abcdf", (1,3) is returned
*/
public StringMatcher.Position find(final String text, final int start,
final int end) {
int newStart = start;
int newEnd = end;
if (text == null) {
throw new IllegalArgumentException();
}
int tlen = text.length();
if (newStart < 0) {
newStart = 0;
}
if (newEnd > tlen) {
newEnd = tlen;
}
if (newEnd < 0 || newStart >= newEnd) {
return null;
}
if (this.fLength == 0) {
return new Position(newStart, newStart);
}
if (this.fIgnoreWildCards) {
int x = posIn(text, newStart, newEnd);
if (x < 0) {
return null;
}
return new Position(x, x + this.fLength);
}
int segCount = this.fSegments.length;
if (segCount == 0) {
// pattern contains only '*'(s)
return new Position(newStart, newEnd);
}
int curPos = newStart;
int matchStart = -1;
int i;
for (i = 0; i < segCount && curPos < newEnd; ++i) {
String current = this.fSegments[i];
int nextMatch = regExpPosIn(text, curPos, newEnd, current);
if (nextMatch < 0) {
return null;
}
if (i == 0) {
matchStart = nextMatch;
}
curPos = nextMatch + current.length();
}
if (i < segCount) {
return null;
}
return new Position(matchStart, curPos);
}
/**
* match the given <code>text</code> with the pattern
*
* @return true if matched otherwise false
* @param text
* a String object
*/
public boolean match(final String text) {
return match(text, 0, text.length());
}
/**
* Given the starting (inclusive) and the ending (exclusive) positions in
* the <code>text</code>, determine if the given substring matches with
* aPattern
*
* @return true if the specified portion of the text matches the pattern
* @param text
* a String object that contains the substring to match
* @param start
* marks the starting position (inclusive) of the substring
* @param end
* marks the ending index (exclusive) of the substring
*/
public boolean match(final String text, final int start, final int end) {
int newStart = start;
int newEnd = end;
if (null == text) {
throw new IllegalArgumentException();
}
if (newStart > newEnd) {
return false;
}
if (this.fIgnoreWildCards) {
return (newEnd - newStart == this.fLength)
&& this.fPattern.regionMatches(this.fIgnoreCase, 0, text,
newStart, this.fLength);
}
int segCount = this.fSegments.length;
if (segCount == 0 && (this.fHasLeadingStar || this.fHasTrailingStar)) {
// pattern contains only '*'(s)
return true;
}
if (newStart == newEnd) {
return this.fLength == 0;
}
if (this.fLength == 0) {
return newStart == newEnd;
}
int tlen = text.length();
if (newStart < 0) {
newStart = 0;
}
if (newEnd > tlen) {
newEnd = tlen;
}
int tCurPos = newStart;
int bound = newEnd - this.fBound;
if (bound < 0) {
return false;
}
int i = 0;
String current = this.fSegments[i];
int segLength = current.length();
/* process first segment */
if (!this.fHasLeadingStar) {
if (!regExpRegionMatches(text, newStart, current, 0, segLength)) {
return false;
}
++i;
tCurPos = tCurPos + segLength;
}
if ((this.fSegments.length == 1) && (!this.fHasLeadingStar)
&& (!this.fHasTrailingStar)) {
// only one segment to match, no wildcards specified
return tCurPos == newEnd;
}
/* process middle segments */
for (; i < segCount && tCurPos <= bound; ++i) {
current = this.fSegments[i];
int currentMatch;
int k = current.indexOf(StringMatcher.SINGLE_WILD_CARD);
if (k < 0) {
currentMatch = textPosIn(text, tCurPos, newEnd, current);
if (currentMatch < 0) {
return false;
}
} else {
currentMatch = regExpPosIn(text, tCurPos, newEnd, current);
if (currentMatch < 0) {
return false;
}
}
tCurPos = currentMatch + current.length();
}
/* process final segment */
if (!this.fHasTrailingStar && tCurPos != newEnd) {
int clen = current.length();
return regExpRegionMatches(text, newEnd - clen, current, 0, clen);
}
return i == segCount;
}
/**
* This method parses the given pattern into segments seperated by wildcard
* '*' characters. Since wildcards are not being used in this case, the
* pattern consists of a single segment.
*/
private void parseNoWildCards() {
this.fSegments = new String[1];
this.fSegments[0] = this.fPattern;
this.fBound = this.fLength;
}
/**
* Parses the given pattern into segments separated by wildcard '*'
* characters.
*/
private void parseWildCards() {
if (this.fPattern.startsWith("*")) { //$NON-NLS-1$
this.fHasLeadingStar = true;
}
if (this.fPattern.endsWith("*")) { //$NON-NLS-1$
/* make sure it's not an escaped wildcard */
if (this.fLength > 1
&& this.fPattern.charAt(this.fLength - 2) != '\\') {
this.fHasTrailingStar = true;
}
}
Vector<String> temp = new Vector<String>();
int pos = 0;
StringBuffer buf = new StringBuffer();
while (pos < this.fLength) {
char c = this.fPattern.charAt(pos++);
switch (c) {
case '\\':
if (pos >= this.fLength) {
buf.append(c);
} else {
char next = this.fPattern.charAt(pos++);
/* if it's an escape sequence */
if (next == '*' || next == '?' || next == '\\') {
buf.append(next);
} else {
/* not an escape sequence, just insert literally */
buf.append(c);
buf.append(next);
}
}
break;
case '*':
if (buf.length() > 0) {
/* new segment */
temp.addElement(buf.toString());
this.fBound += buf.length();
buf.setLength(0);
}
break;
case '?':
/* append special character representing single match wildcard */
buf.append(StringMatcher.SINGLE_WILD_CARD);
break;
default:
buf.append(c);
}
}
/* add last buffer to segment list */
if (buf.length() > 0) {
temp.addElement(buf.toString());
this.fBound += buf.length();
}
this.fSegments = new String[temp.size()];
temp.copyInto(this.fSegments);
}
/**
* @param text
* a string which contains no wildcard
* @param start
* the starting index in the text for search, inclusive
* @param end
* the stopping point of search, exclusive
* @return the starting index in the text of the pattern , or -1 if not
* found
*/
protected int posIn(final String text, final int start, final int end) {
int max = end - this.fLength;
if (!this.fIgnoreCase) {
int i = text.indexOf(this.fPattern, start);
if (i == -1 || i > max) {
return -1;
}
return i;
}
for (int i = start; i <= max; ++i) {
if (text.regionMatches(true, i, this.fPattern, 0, this.fLength)) {
return i;
}
}
return -1;
}
/**
* @param text
* a simple regular expression that may only contain '?'(s)
* @param start
* the starting index in the text for search, inclusive
* @param end
* the stopping point of search, exclusive
* @param p
* a simple regular expression that may contains '?'
* @return the starting index in the text of the pattern , or -1 if not
* found
*/
protected int regExpPosIn(final String text, final int start,
final int end, final String p) {
int plen = p.length();
int max = end - plen;
for (int i = start; i <= max; ++i) {
if (regExpRegionMatches(text, i, p, 0, plen)) {
return i;
}
}
return -1;
}
/**
*
* @return boolean
* @param text
* a String to match
* @param tStart
* indicates the starting index of match, inclusive
* @param p
* a simple regular expression that may contain '?'
*/
protected boolean regExpRegionMatches(final String text, final int tStart,
final String p, final int pStart, final int plen) {
int curTStart = tStart;
int curPStart = pStart;
int curPlen = plen;
while (curPlen-- > 0) {
char tchar = text.charAt(curTStart++);
char pchar = p.charAt(curPStart++);
/* process wild cards */
if (!this.fIgnoreWildCards) {
/* skip single wild cards */
if (pchar == StringMatcher.SINGLE_WILD_CARD) {
continue;
}
}
if (pchar == tchar) {
continue;
}
if (this.fIgnoreCase) {
if (UCharacter.toUpperCase(tchar) == UCharacter
.toUpperCase(pchar)) {
continue;
}
// comparing after converting to upper case doesn't handle all
// cases;
// also compare after converting to lower case
if (UCharacter.toLowerCase(tchar) == UCharacter
.toLowerCase(pchar)) {
continue;
}
}
return false;
}
return true;
}
/**
* @param text
* the string to match
* @param start
* the starting index in the text for search, inclusive
* @param end
* the stopping point of search, exclusive
* @param p
* a string that has no wildcard
* @return the starting index in the text of the pattern , or -1 if not
* found
*/
protected int textPosIn(final String text, final int start, final int end,
final String p) {
int plen = p.length();
int max = end - plen;
if (!this.fIgnoreCase) {
int i = text.indexOf(p, start);
if (i == -1 || i > max) {
return -1;
}
return i;
}
for (int i = start; i <= max; ++i) {
if (text.regionMatches(true, i, p, 0, plen)) {
return i;
}
}
return -1;
}
}