blob: ddd548481535f8f8ef89a1780f80abc16510026c [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2000, 2006 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* IBM Corporation - initial API and implementation
* Matt McCutchen (hashproduct+eclipse@gmail.com) - Bug 132260 Eclipse doesn't understand negated character classes in .cvsignore
*******************************************************************************/
package org.eclipse.team.internal.ccvs.core.util;
import java.util.Vector;
import java.util.HashMap;
/**
* A StringMatcher contains a glob and matches it against strings.
* StringMatcher supports * and ? wildcards and character classes, possibly
* negated by !, that contain single characters and/or ranges.
* Note: code copied from org.eclipse.jdt.internal.core.util.StringMatcher on April 3, 2001
* (version 0.1 - 010901H18 [rename jbl]).
*/
public class StringMatcher {
protected static class CharacterClass {
final boolean isNegated;
final String text;
CharacterClass(boolean isNegated, String text) {
this.isNegated = isNegated;
this.text = text;
}
boolean listed(char c) {
for (int i = 0; i < text.length(); ) {
if (i + 2 < text.length() && text.charAt(i + 1) == '-') {
if (c >= text.charAt(i) && c <= text.charAt(i + 2))
return true;
i += 3;
} else {
if (c == text.charAt(i))
return true;
i++;
}
}
return false;
}
boolean match(char c) {
return listed(c) ^ isNegated;
}
}
protected String fPattern;
protected int fLength; // pattern length
protected boolean fIgnoreWildCards;
protected boolean fIgnoreCase;
protected boolean fHasLeadingStar;
protected boolean fHasTrailingStar;
protected String fSegments[]; //the given pattern is split into * separated segments
protected HashMap/*<Integer, CharacterClass>*/ fCharacterClassMaps[];
/* boundary value beyond which we don't need to search in the text */
protected int fBound = 0;
/** \? in pattern becomes ? in fSegments, while ? in pattern becomes this */
protected static final char fSingleWildCard = '\u0000';
public static class Position {
int start; //inclusive
int end; //exclusive
public Position(int start, int end) {
this.start = start;
this.end = end;
}
public int getStart() {
return start;
}
public int getEnd() {
return end;
}
}
/**
* Find the first occurrence of the pattern between <code>start</code> (inclusive)
* and <code>end</code> (exclusive).
* @param text the String object to search in
* @param start the starting index of the search range, inclusive
* @param end the ending index of the search range, exclusive
* @return a <code>StringMatcher.Position</code> object that keeps the starting
* (inclusive) and ending positions (exclusive) of the first occurrence of the
* pattern in the specified range of the text; return null if not found or subtext
* is empty (start==end). A pair of zeros is returned if pattern is empty string
* Note that for pattern like "*abc*" with leading and trailing stars, position of "abc"
* is returned. For a pattern like"*??*" in text "abcdf", (1,3) is returned
*/
public StringMatcher.Position find(String text, int start, int end) {
if (fPattern == null|| text == null)
throw new IllegalArgumentException();
int tlen = text.length();
if (start < 0)
start = 0;
if (end > tlen)
end = tlen;
if (end < 0 ||start >= end )
return null;
if (fLength == 0)
return new Position(start, start);
if (fIgnoreWildCards) {
int x = posIn(text, start, end);
if (x < 0)
return null;
return new Position(x, x+fLength);
}
int segCount = fSegments.length;
if (segCount == 0)//pattern contains only '*'(s)
return new Position (start, end);
int curPos = start;
int matchStart = -1;
int i;
for (i = 0; i < segCount && curPos < end; ++i) {
String current = fSegments[i];
int nextMatch = regExpPosIn(text, curPos, end, current, fCharacterClassMaps[i]);
if (nextMatch < 0 )
return null;
if(i == 0)
matchStart = nextMatch;
curPos = nextMatch + current.length();
}
if (i < segCount)
return null;
return new Position(matchStart, curPos);
}
/**
* Constructs a StringMatcher that matches strings against the glob
* <code>aPattern</code>.
*
* <code>aPattern</code> may contain "?"s, which match single characters,
* "*"s, which match zero or more characters, and character classes in
* "[...]". All characters other than "*", "?", and "[" match themselves,
* except for "\", which escapes the following character. For example,
* "\*" matches "*" and "\a" matches "a", while "\\" matches "\". Remember
* that Java string literals have an additional level of escaping, so a
* string literal for a glob matching a single backslash is written "\\\\".
*
* "[" begins a character class, which may contain characters and/or ranges;
* "]" ends the class. A character class matches any single character in
* it; for example, "[ac-e]" matches an "a", a "c", a "d", or an "e". A
* negated character class begins with "[!" and matches any single character
* not listed. Inside a character class, "\" loses its special meaning as
* an escape character. The fancier POSIX requirements for character
* classes are not supported: ranges use Unicode character numbers, and
* (for example) [:alpha:], [.ch.], and [=a=] are not recognized.
*
* @param aPattern the glob to match text with
* @param ignoreCase if true, case is ignored
* @param ignoreWildCards if true, the pattern is taken literally instead of
* as a glob
*/
public StringMatcher(String aPattern, boolean ignoreCase, boolean ignoreWildCards) {
fIgnoreCase = ignoreCase;
fIgnoreWildCards = ignoreWildCards;
fLength = aPattern.length();
/* convert case */
if (fIgnoreCase) {
fPattern = aPattern.toUpperCase();
} else {
fPattern = aPattern;
}
if (fIgnoreWildCards) {
parseNoWildCards();
} else {
parseWildCards();
}
}
/**
* Given the starting (inclusive) and the ending (exclusive) poisitions in the
* <code>text</code>, determine if the given substring matches with aPattern
* @return true if the specified portion of the text matches the pattern
* @param text a String object that contains the substring to match
* @param start marks the starting position (inclusive) of the substring
* @param end marks the ending index (exclusive) of the substring
*/
public boolean match(String text, int start, int end) {
if (null == text)
throw new IllegalArgumentException();
if (start > end)
return false;
if (fIgnoreWildCards)
return (end - start == fLength) && fPattern.regionMatches(fIgnoreCase, 0, text, start, fLength);
int segCount= fSegments.length;
if (segCount == 0 && (fHasLeadingStar || fHasTrailingStar)) // pattern contains only '*'(s)
return true;
if (start == end)
return fLength == 0;
if (fLength == 0)
return start == end;
int tlen= text.length();
if (start < 0)
start= 0;
if (end > tlen)
end= tlen;
int tCurPos= start;
int bound= end - fBound;
if ( bound < 0)
return false;
int i=0;
String current= fSegments[i];
int segLength= current.length();
/* process first segment */
if (!fHasLeadingStar){
if(!regExpRegionMatches(text, start, current, 0, segLength, fCharacterClassMaps[i])) {
return false;
} else {
++i;
tCurPos= tCurPos + segLength;
}
}
if ((fSegments.length == 1) && (!fHasLeadingStar) && (!fHasTrailingStar)) {
// only one segment to match, no wildcards specified
return tCurPos == end;
}
/* process middle segments */
while (i < segCount) {
current= fSegments[i];
int currentMatch;
int k= current.indexOf(fSingleWildCard);
if (k < 0) {
currentMatch= textPosIn(text, tCurPos, end, current);
if (currentMatch < 0)
return false;
} else {
currentMatch= regExpPosIn(text, tCurPos, end, current, fCharacterClassMaps[i]);
if (currentMatch < 0)
return false;
}
tCurPos= currentMatch + current.length();
i++;
}
/* process final segment */
if (!fHasTrailingStar && tCurPos != end) {
int clen= current.length();
return regExpRegionMatches(text, end - clen, current, 0, clen, fCharacterClassMaps[i]);
}
return i == segCount ;
}
/**
* match the given <code>text</code> with the pattern
* @return true if matched eitherwise false
* @param text the String object to match against the pattern
*/
public boolean match(String text) {
return match(text, 0, text.length());
}
/**
* This method parses the given pattern into segments separated by wildcard '*' characters.
* Since wildcards are not being used in this case, the pattern consists of a single segment.
*/
private void parseNoWildCards() {
fSegments = new String[1];
fSegments[0] = fPattern;
fBound = fLength;
}
/**
* This method parses the given pattern into segments separated by wildcard '*' characters.
* @param p a String object that is a simple regular expression with *  and/or ? 
*/
private void parseWildCards() {
if(fPattern.startsWith("*"))//$NON-NLS-1$
fHasLeadingStar = true;
Vector temp = new Vector();
HashMap/*<Integer, CharacterClass>*/ segmentCCs = null;
Vector/*<HashMap<Integer, CharacterClass>>*/ allCCs = new Vector();
int pos = 0;
StringBuffer buf = new StringBuffer();
while (pos < fLength) {
char c = fPattern.charAt(pos++);
fHasTrailingStar = false;
switch (c) {
case '\\':
if (pos >= fLength) {
buf.append(c);
} else {
c = fPattern.charAt(pos++);
buf.append(c);
}
break;
case '*':
fHasTrailingStar = true;
if (buf.length() > 0) {
/* new segment */
temp.addElement(buf.toString());
allCCs.addElement(segmentCCs);
fBound += buf.length();
buf.setLength(0);
segmentCCs = null;
}
break;
case '[':
if (segmentCCs == null)
segmentCCs = new HashMap/*<Integer, CharacterClass>*/();
if (pos >= fLength) {
// Unterminated; take [ literally for lack of anything better to do
buf.append(c);
break;
}
boolean negated = (fPattern.charAt(pos) == '!');
int beginPos = (negated ? pos + 1 : pos);
int endPos = fPattern.indexOf(']', beginPos + 1);
if (endPos == -1) {
// Unterminated; take [ literally for lack of anything better to do
buf.append(c);
break;
}
CharacterClass cc = new CharacterClass(negated, fPattern.substring(beginPos, endPos));
segmentCCs.put(new Integer(buf.length()), cc);
pos = endPos + 1;
/* fall through; fSingleWildCard can also represent a character class */
case '?':
/* append special character representing single match wildcard */
buf.append(fSingleWildCard);
break;
default:
buf.append(c);
}
}
/* add last buffer to segment list */
if (buf.length() > 0) {
temp.addElement(buf.toString());
allCCs.addElement(segmentCCs);
fBound += buf.length();
}
fSegments = new String[temp.size()];
temp.copyInto(fSegments);
fCharacterClassMaps = new HashMap[allCCs.size()];
allCCs.copyInto(fCharacterClassMaps);
}
/**
* @param text a string which contains no wildcard
* @param start the starting index in the text for search, inclusive
* @param end the stopping point of search, exclusive
* @return the starting index in the text of the pattern , or -1 if not found
*/
protected int posIn(String text, int start, int end) {//no wild card in pattern
return textPosIn(text, start, end, fPattern);
}
/**
* @param text a simple regular expression that may only contain '?'(s)
* @param start the starting index in the text for search, inclusive
* @param end the stopping point of search, exclusive
* @param p a simple regular expression that may contains '?'
* @param caseIgnored whether the pattern is not casesensitive
* @return the starting index in the text of the pattern , or -1 if not found
*/
protected int regExpPosIn(String text, int start, int end, String p, HashMap/*<Integer, CharacterClass>*/ ccMap) {
int plen = p.length();
int max = end - plen;
for (int i = start; i <= max; ++i) {
if (regExpRegionMatches(text, i, p, 0, plen, ccMap))
return i;
}
return -1;
}
/**
*
* @return boolean
* @param text a String to match
* @param start int that indicates the starting index of match, inclusive
* @param end int that indicates the ending index of match, exclusive
* @param p String, a simple regular expression that may contain '?'
* @param ignoreCase boolean indicating whether <code>p</code> is case sensitive
* @param ccMap maps each index of p at which fSingleWildCard occurs (as an
* Integer) to CharacterClass data, or null for a plain old ?
*/
protected boolean regExpRegionMatches(String text, int tStart, String p, int pStart, int plen, HashMap/*<Integer, CharacterClass>*/ ccMap) {
for (int ppos = 0; plen-- > 0; ppos++) {
char tchar = text.charAt(tStart++);
char pchar = p.charAt(pStart++);
/* process wild cards */
if (!fIgnoreWildCards) {
/* skip single wild cards */
if (pchar == fSingleWildCard) {
if (ccMap == null)
continue;
CharacterClass cc = (CharacterClass) ccMap.get(new Integer(ppos));
if (cc == null || cc.match(tchar))
continue;
else
return false;
}
}
if (pchar == tchar)
continue;
if (fIgnoreCase) {
char tc = Character.toUpperCase(tchar);
if (tc == pchar)
continue;
}
return false;
}
return true;
}
/**
* @param text the string to match
* @param start the starting index in the text for search, inclusive
* @param end the stopping point of search, exclusive
* @param p a string that has no wildcard
* @param ignoreCase boolean indicating whether p is case sensitive
* @return the starting index in the text of the pattern , or -1 if not found
*/
protected int textPosIn(String text, int start, int end, String p) {
int plen = p.length();
int max = end - plen;
if (!fIgnoreCase) {
int i = text.indexOf(p, start);
if (i == -1 || i > max)
return -1;
return i;
}
for (int i = start; i <= max; ++i) {
if (text.regionMatches(true, i, p, 0, plen))
return i;
}
return -1;
}
}