blob: e51c2ad103b3666ed12d6a06ceda0eb8f88eb279 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2000, 2020 IBM Corporation and others.
*
* This program and the accompanying materials
* are made available under the terms of the Eclipse Public License 2.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* IBM Corporation - initial API and implementation
* Lucas Bullen (Red Hat Inc.) - [Bug 203792] filter should support multiple keywords
* Mickael Istria (Red Hat Inc.) - [534277] erroneous filtering with multiple words
*******************************************************************************/
package org.eclipse.ui.internal.misc;
import java.util.ArrayList;
import java.util.regex.Pattern;
/**
* A string pattern matcher, supporting "*" and "?" wildcards.
*/
public class StringMatcher {
protected String fPattern;
protected int fLength; // pattern length
protected boolean fIgnoreWildCards;
protected boolean fIgnoreCase;
protected String[] patternWords;
protected Word wholePatternWord;
protected Word[] splittedPatternWords;
protected static final char fSingleWildCard = '\u0000';
private static final Pattern NON_WORD = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS); //$NON-NLS-1$
class Word {
private boolean hasTrailingStar = false;
private boolean hasLeadingStar = false;
private int bound = 0;
private String[] fragments = null;
private final String pattern;
Word(String pattern) {
this.pattern = pattern;
}
public Word(String pattern, int fLength, String[] wordsSplitted) {
this(pattern);
this.bound = fLength;
this.fragments = wordsSplitted;
}
private void parseWildcards() {
if (this.pattern.startsWith("*")) { //$NON-NLS-1$
this.hasLeadingStar = true;
}
if (this.pattern.endsWith("*")) {//$NON-NLS-1$
/* make sure it's not an escaped wildcard */
if (this.pattern.length() > 1 && this.pattern.charAt(this.pattern.length() - 2) != '\\') {
this.hasTrailingStar = true;
}
}
ArrayList<String> temp = new ArrayList<>();
int pos = 0;
StringBuilder buf = new StringBuilder();
while (pos < this.pattern.length()) {
char c = this.pattern.charAt(pos++);
switch (c) {
case '\\':
if (pos >= this.pattern.length()) {
buf.append(c);
} else {
char next = this.pattern.charAt(pos++);
/* if it's an escape sequence */
if (next == '*' || next == '?' || next == '\\') {
buf.append(next);
} else {
/* not an escape sequence, just insert literally */
buf.append(c);
buf.append(next);
}
}
break;
case '*':
if (buf.length() > 0) {
/* new segment */
temp.add(buf.toString());
this.bound += buf.length();
buf.setLength(0);
}
break;
case '?':
/* append special character representing single match wildcard */
buf.append(fSingleWildCard);
break;
default:
buf.append(c);
}
}
/* add last buffer to segment list */
if (buf.length() > 0) {
temp.add(buf.toString());
this.bound += buf.length();
}
this.fragments = temp.toArray(new String[temp.size()]);
}
boolean match(String text, int start, int end) {
boolean found = true;
if (fIgnoreWildCards) {
if ((end - start == this.pattern.length())
&& this.pattern.regionMatches(fIgnoreCase, 0, text, start, this.pattern.length()))
return true;
return false;
}
String[] segments = null;
segments = this.fragments;
int segCount = segments.length;
if (segCount == 0 && (this.hasLeadingStar || this.hasTrailingStar)) {
return true;
}
if (start == end) {
if (this.pattern.length() == 0)
return true;
return false;
}
if (this.pattern.length() == 0) {
if (start == end)
return true;
return false;
}
int tCurPos = start;
int bound = end - this.bound;
if (bound < 0) {
return false;
}
int i = 0;
String current = segments[i];
int segLength = current.length();
/* process first segment */
if (!hasLeadingStar) {
if (!regExpRegionMatches(text, start, current, 0, segLength)) {
return false;
}
++i;
tCurPos = tCurPos + segLength;
}
if ((segments.length == 1) && (!hasLeadingStar) && (!hasTrailingStar)) {
// only one segment to match, no wildcards specified
if (tCurPos == end)
return true;
return false;
}
/* process middle segments */
while (i < segCount && found) {
current = segments[i];
int currentMatch;
int k = current.indexOf(fSingleWildCard);
if (k < 0) {
currentMatch = textPosIn(text, tCurPos, end, current);
if (currentMatch < 0) {
found = false;
}
} else {
currentMatch = regExpPosIn(text, tCurPos, end, current);
if (currentMatch < 0) {
found = false;
}
}
if (!found)
return false;
tCurPos = currentMatch + current.length();
i++;
}
/* process final segment */
if (!hasTrailingStar && tCurPos != end) {
int clen = current.length();
if (regExpRegionMatches(text, end - clen, current, 0, clen))
return true;
return false;
}
if (i == segCount)
return true;
return false;
}
/**
* @param text
* @param start
* @param end
* @return whether the current pattern word matches at least one word in the
* given text
*/
public boolean matchTextWord(String text, int start, int end) {
String[] textWords = getWords(text.substring(start, end));
if (textWords.length == 0) {
return pattern.isEmpty();
}
for (String subword : textWords) {
if (match(subword, 0, subword.length())) {
return true;
}
}
return false;
}
}
/**
* StringMatcher constructor takes in a String object that is a simple pattern
* which may contain '*' for 0 and many characters and '?' for exactly one
* character.
*
* Literal '*' and '?' characters must be escaped in the pattern e.g., "\*"
* means literal "*", etc.
*
* Escaping any other character (including the escape character itself), just
* results in that character in the pattern. e.g., "\a" means "a" and "\\" means
* "\"
*
* If invoking the StringMatcher with string literals in Java, don't forget
* escape characters are represented by "\\".
*
* @param pattern the pattern to match text against
* @param ignoreCase if true, case is ignored
* @param ignoreWildCards if true, wild cards and their escape sequences are
* ignored (everything is taken literally).
*/
public StringMatcher(String pattern, boolean ignoreCase, boolean ignoreWildCards) {
if (pattern == null) {
throw new IllegalArgumentException();
}
fIgnoreCase = ignoreCase;
fIgnoreWildCards = ignoreWildCards;
fPattern = pattern;
fLength = pattern.length();
parsePatternIntoWords();
if (fIgnoreWildCards) {
parseNoWildCards();
} else {
if (wholePatternWord != null) {
wholePatternWord.parseWildcards();
}
if (splittedPatternWords != null && splittedPatternWords.length > 1) {
for (Word word : splittedPatternWords) {
word.parseWildcards();
}
}
}
}
/**
* match the given <code>text</code> with the pattern
*
* @return true if matched otherwise false
* @param text a String object
*/
public boolean match(String text) {
if (text == null) {
return false;
}
return match(text, 0, text.length());
}
/**
* Given the starting (inclusive) and the ending (exclusive) positions in the
* <code>text</code>, determine if the given substring matches with aPattern
*
* @return true if the specified portion of the text matches the pattern
* @param text a String object that contains the substring to match
* @param start marks the starting position (inclusive) of the substring
* @param end marks the ending index (exclusive) of the substring
*/
public boolean match(String text, int start, int end) {
if (null == text) {
throw new IllegalArgumentException();
}
if (start > end) {
return false;
}
int tlen = text.length();
start = Math.max(0, start);
end = Math.min(end, tlen);
if (wholePatternWord != null
&& (wholePatternWord.match(text, start, end) || wholePatternWord.matchTextWord(text, start, end))) {
return true;
}
if (splittedPatternWords != null && splittedPatternWords.length > 0) {
for (Word word : splittedPatternWords) {
if (!word.match(text, start, end) && !word.matchTextWord(text, start, end)) {
return false;
}
}
return true;
}
return false;
}
/**
* This method parses the given pattern into words separated by spaces
* characters. Since wildcards are not being used in this case, the pattern
* consists of a single segment.
*/
private void parsePatternIntoWords() {
String trimedPattern = fPattern.trim();
if (!trimedPattern.isEmpty()) {
this.wholePatternWord = new Word(trimedPattern);
patternWords = trimedPattern.split("\\s+"); //$NON-NLS-1$
if (patternWords.length > 1) {
this.splittedPatternWords = new Word[patternWords.length];
for (int i = 0; i < patternWords.length; i++) {
String patternWord = patternWords[i];
if (!patternWord.endsWith("*")) { //$NON-NLS-1$
patternWord += '*';
}
this.splittedPatternWords[i] = new Word(patternWord);
// words may be found anywhere in the line
}
}
}
}
/**
* This method parses the given pattern into segments seperated by wildcard '*'
* characters. Since wildcards are not being used in this case, the pattern
* consists of a single segment.
*/
private void parseNoWildCards() {
this.wholePatternWord = new Word(fPattern, fLength, patternWords);
this.wholePatternWord.bound = fLength;
this.wholePatternWord.fragments = patternWords;
}
/**
* @param text a string which contains no wildcard
* @param start the starting index in the text for search, inclusive
* @param end the stopping point of search, exclusive
* @return the starting index in the text of the pattern , or -1 if not found
*/
protected int posIn(String text, int start, int end) {// no wild card in pattern
int max = end - fLength;
if (!fIgnoreCase) {
int i = text.indexOf(fPattern, start);
if (i == -1 || i > max) {
return -1;
}
return i;
}
for (int i = start; i <= max; ++i) {
if (text.regionMatches(true, i, fPattern, 0, fLength)) {
return i;
}
}
return -1;
}
/**
* @param text a simple regular expression that may only contain '?'(s)
* @param start the starting index in the text for search, inclusive
* @param end the stopping point of search, exclusive
* @param p a simple regular expression that may contains '?'
* @return the starting index in the text of the pattern , or -1 if not found
*/
protected int regExpPosIn(String text, int start, int end, String p) {
int plen = p.length();
int max = end - plen;
for (int i = start; i <= max; ++i) {
if (regExpRegionMatches(text, i, p, 0, plen)) {
return i;
}
}
return -1;
}
/**
*
* @return boolean
* @param text a String to match
* @param start int that indicates the starting index of match, inclusive
* @param end int that indicates the ending index of match, exclusive
* @param p String, String, a simple regular expression that may
* contain '?'
* @param ignoreCase boolean indicating whether <code>p</code> is case sensitive
*/
protected boolean regExpRegionMatches(String text, int tStart, String p, int pStart, int plen) {
while (plen-- > 0) {
char tchar = text.charAt(tStart++);
char pchar = p.charAt(pStart++);
/* process wild cards */
if (!fIgnoreWildCards) {
/* skip single wild cards */
if (pchar == fSingleWildCard) {
continue;
}
}
if (pchar == tchar) {
continue;
}
if (fIgnoreCase) {
if (Character.toUpperCase(tchar) == Character.toUpperCase(pchar)) {
continue;
}
// comparing after converting to upper case doesn't handle all cases;
// also compare after converting to lower case
if (Character.toLowerCase(tchar) == Character.toLowerCase(pchar)) {
continue;
}
}
return false;
}
return true;
}
/**
* @param text the string to match
* @param start the starting index in the text for search, inclusive
* @param end the stopping point of search, exclusive
* @param p a pattern string that has no wildcard
* @return the starting index in the text of the pattern , or -1 if not found
*/
protected int textPosIn(String text, int start, int end, String p) {
int plen = p.length();
int max = end - plen;
if (!fIgnoreCase) {
int i = text.indexOf(p, start);
if (i == -1 || i > max) {
return -1;
}
return i;
}
for (int i = start; i <= max; ++i) {
if (text.regionMatches(true, i, p, 0, plen)) {
return i;
}
}
return -1;
}
/**
* Take the given filter text and break it down into words using a
* BreakIterator.
*
* @param text
* @return an array of words
*/
public static String[] getWords(String text) {
return NON_WORD.split(text, 0);
}
}