blob: ae19839ad6f58d3ed020f0eb615ac6117a08c018 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2020 Thomas Wolf<thomas.wolf@paranor.ch> and others.
*
* This program and the accompanying materials
* are made available under the terms of the Eclipse Public License 2.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*******************************************************************************/
package org.eclipse.ui.internal.misc;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.regex.Pattern;
import org.eclipse.core.text.StringMatcher;
/**
* Similar to {@link StringMatcher}, this {@code TextMatcher} matches a pattern
* that may contain the wildcards '?' or '*' against a text. However, the
* matching is not only done on the full text, but also on individual words from
* the text, and if the pattern contains whitespace, the pattern is split into
* sub-patterns and those are matched, too.
* <p>
* The precise rules are:
* </p>
* <ul>
* <li>If the full pattern matches the full text, the match succeeds.</li>
* <li>If the full pattern matches a single word of the text, the match
* succeeds.</li>
* <li>If all sub-patterns match a prefix of the whole text or any prefix of any
* word, the match succeeds.</li>
* <li>Otherwise, the match fails.</li>
* </ul>
* <p>
* An empty pattern matches only the empty text.
* </p>
*/
public final class TextMatcher {
private static final Pattern NON_WORD = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS); //$NON-NLS-1$
private final StringMatcher full;
private final List<StringMatcher> parts;
/**
* Creates a new {@link TextMatcher}.
*
* @param pattern to match
* @param ignoreCase whether to do case-insensitive matching
* @param ignoreWildCards whether to treat '?' and '*' as normal characters, not
* as wildcards
* @throws IllegalArgumentException if {@code pattern == null}
*/
public TextMatcher(String pattern, boolean ignoreCase, boolean ignoreWildCards) {
full = new StringMatcher(pattern, ignoreCase, ignoreWildCards);
parts = splitPattern(pattern, ignoreCase, ignoreWildCards);
}
private List<StringMatcher> splitPattern(String pattern,
boolean ignoreCase, boolean ignoreWildCards) {
String pat = pattern.trim();
if (pat.isEmpty()) {
return Collections.emptyList();
}
String[] subPatterns = pattern.split("\\s+"); //$NON-NLS-1$
if (subPatterns.length <= 1) {
return Collections.emptyList();
}
List<StringMatcher> matchers = new ArrayList<>();
for (String s : subPatterns) {
if (s == null || s.isEmpty()) {
continue;
}
StringMatcher m = new StringMatcher(s, ignoreCase, ignoreWildCards);
m.usePrefixMatch();
matchers.add(m);
}
return matchers;
}
/**
* Determines whether the given {@code text} matches the pattern.
*
* @param text String to match; must not be {@code null}
* @return {@code true} if the whole {@code text} matches the pattern;
* {@code false} otherwise
* @throws IllegalArgumentException if {@code text == null}
*/
public boolean match(String text) {
if (text == null) {
throw new IllegalArgumentException();
}
return match(text, 0, text.length());
}
/**
* Determines whether the given sub-string of {@code text} from {@code start}
* (inclusive) to {@code end} (exclusive) matches the pattern.
*
* @param text String to match in; must not be {@code null}
* @param start start index (inclusive) within {@code text} of the sub-string to
* match
* @param end end index (exclusive) within {@code text} of the sub-string to
* match
* @return {@code true} if the given slice of {@code text} matches the pattern;
* {@code false} otherwise
* @throws IllegalArgumentException if {@code text == null}
*/
public boolean match(String text, int start, int end) {
if (text == null) {
throw new IllegalArgumentException();
}
if (start > end) {
return false;
}
int tlen = text.length();
start = Math.max(0, start);
end = Math.min(end, tlen);
if (full.match(text, start, end)) {
return true;
}
String[] words = getWords(text.substring(start, end));
if (match(full, words)) {
return true;
}
if (parts.isEmpty()) {
return false;
}
for (StringMatcher subMatcher : parts) {
if (!subMatcher.match(text, start, end) && !match(subMatcher, words)) {
return false;
}
}
return true;
}
private boolean match(StringMatcher matcher, String[] words) {
return Arrays.stream(words).filter(Objects::nonNull).anyMatch(matcher::match);
}
/**
* Splits a given text into words.
*
* @param text to split
* @return the words of the text
*/
public static String[] getWords(String text) {
// Previous implementations (in the removed StringMatcher) used the ICU
// BreakIterator to split the text. That worked well, but in 2020 it was decided
// to drop the dependency to the ICU library due to its size. The JDK
// BreakIterator splits differently, causing e.g.
// https://bugs.eclipse.org/bugs/show_bug.cgi?id=563121 . The NON_WORD regexp
// appears to work well for programming language text, but may give sub-optimal
// results for natural languages. See also
// https://bugs.eclipse.org/bugs/show_bug.cgi?id=90579 .
return NON_WORD.split(text);
}
@Override
public String toString() {
return '[' + full.toString() + ',' + parts + ']';
}
}