bundles/org.eclipse.ui.workbench/Eclipse UI/org/eclipse/ui/internal/misc/TextMatcher.java - platform/eclipse.platform.ui - Git at Google

 /*******************************************************************************
  * Copyright (c) 2020 Thomas Wolf<thomas.wolf@paranor.ch> and others.
  *
  * This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License 2.0
  * which accompanies this distribution, and is available at
  * https://www.eclipse.org/legal/epl-2.0/
  *
  * SPDX-License-Identifier: EPL-2.0
  *******************************************************************************/
 package org.eclipse.ui.internal.misc;

 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import java.util.Objects;
 import java.util.regex.Pattern;
 import org.eclipse.core.text.StringMatcher;

 /**
  * Similar to {@link StringMatcher}, this {@code TextMatcher} matches a pattern
  * that may contain the wildcards '?' or '*' against a text. However, the
  * matching is not only done on the full text, but also on individual words from
  * the text, and if the pattern contains whitespace, the pattern is split into
  * sub-patterns and those are matched, too.
  * <p>
  * The precise rules are:
  * </p>
  * <ul>
  * <li>If the full pattern matches the full text, the match succeeds.</li>
  * <li>If the full pattern matches a single word of the text, the match
  * succeeds.</li>
  * <li>If all sub-patterns match a prefix of the whole text or any prefix of any
  * word, the match succeeds.</li>
  * <li>Otherwise, the match fails.</li>
  * </ul>
  * <p>
  * An empty pattern matches only the empty text.
  * </p>
  */
 public final class TextMatcher {

 	private static final Pattern NON_WORD = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS); //$NON-NLS-1$

 	private final StringMatcher full;

 	private final List<StringMatcher> parts;

 	/**
 	 * Creates a new {@link TextMatcher}.
 	 *
 	 * @param pattern         to match
 	 * @param ignoreCase      whether to do case-insensitive matching
 	 * @param ignoreWildCards whether to treat '?' and '*' as normal characters, not
 	 *                        as wildcards
 	 * @throws IllegalArgumentException if {@code pattern == null}
 	 */
 	public TextMatcher(String pattern, boolean ignoreCase, boolean ignoreWildCards) {
 		full = new StringMatcher(pattern, ignoreCase, ignoreWildCards);
 		parts = splitPattern(pattern, ignoreCase, ignoreWildCards);
 	}

 	private List<StringMatcher> splitPattern(String pattern,
 			boolean ignoreCase, boolean ignoreWildCards) {
 		String pat = pattern.trim();
 		if (pat.isEmpty()) {
 			return Collections.emptyList();
 		}
 		String[] subPatterns = pattern.split("\\s+"); //$NON-NLS-1$
 		if (subPatterns.length <= 1) {
 			return Collections.emptyList();
 		}
 		List<StringMatcher> matchers = new ArrayList<>();
 		for (String s : subPatterns) {
 			if (s == null || s.isEmpty()) {
 				continue;
 			}
 			StringMatcher m = new StringMatcher(s, ignoreCase, ignoreWildCards);
 			m.usePrefixMatch();
 			matchers.add(m);
 		}
 		return matchers;
 	}

 	/**
 	 * Determines whether the given {@code text} matches the pattern.
 	 *
 	 * @param text String to match; must not be {@code null}
 	 * @return {@code true} if the whole {@code text} matches the pattern;
 	 *         {@code false} otherwise
 	 * @throws IllegalArgumentException if {@code text == null}
 	 */
 	public boolean match(String text) {
 		if (text == null) {
 			throw new IllegalArgumentException();
 		}
 		return match(text, 0, text.length());
 	}

 	/**
 	 * Determines whether the given sub-string of {@code text} from {@code start}
 	 * (inclusive) to {@code end} (exclusive) matches the pattern.
 	 *
 	 * @param text  String to match in; must not be {@code null}
 	 * @param start start index (inclusive) within {@code text} of the sub-string to
 	 *              match
 	 * @param end   end index (exclusive) within {@code text} of the sub-string to
 	 *              match
 	 * @return {@code true} if the given slice of {@code text} matches the pattern;
 	 *         {@code false} otherwise
 	 * @throws IllegalArgumentException if {@code text == null}
 	 */
 	public boolean match(String text, int start, int end) {
 		if (text == null) {
 			throw new IllegalArgumentException();
 		}
 		if (start > end) {
 			return false;
 		}
 		int tlen = text.length();
 		start = Math.max(0, start);
 		end = Math.min(end, tlen);
 		if (full.match(text, start, end)) {
 			return true;
 		}
 		String[] words = getWords(text.substring(start, end));
 		if (match(full, words)) {
 			return true;
 		}
 		if (parts.isEmpty()) {
 			return false;
 		}
 		for (StringMatcher subMatcher : parts) {
 			if (!subMatcher.match(text, start, end) && !match(subMatcher, words)) {
 				return false;
 			}
 		}
 		return true;
 	}

 	private boolean match(StringMatcher matcher, String[] words) {
 		return Arrays.stream(words).filter(Objects::nonNull).anyMatch(matcher::match);
 	}

 	/**
 	 * Splits a given text into words.
 	 *
 	 * @param text to split
 	 * @return the words of the text
 	 */
 	public static String[] getWords(String text) {
 		// Previous implementations (in the removed StringMatcher) used the ICU
 		// BreakIterator to split the text. That worked well, but in 2020 it was decided
 		// to drop the dependency to the ICU library due to its size. The JDK
 		// BreakIterator splits differently, causing e.g.
 		// https://bugs.eclipse.org/bugs/show_bug.cgi?id=563121 . The NON_WORD regexp
 		// appears to work well for programming language text, but may give sub-optimal
 		// results for natural languages. See also
 		// https://bugs.eclipse.org/bugs/show_bug.cgi?id=90579 .
 		return NON_WORD.split(text);
 	}

 	@Override
 	public String toString() {
 		return '[' + full.toString() + ',' + parts + ']';
 	}
 }
	/*******************************************************************************
	* Copyright (c) 2020 Thomas Wolf<thomas.wolf@paranor.ch> and others.
	*
	* This program and the accompanying materials
	* are made available under the terms of the Eclipse Public License 2.0
	* which accompanies this distribution, and is available at
	* https://www.eclipse.org/legal/epl-2.0/
	*
	* SPDX-License-Identifier: EPL-2.0
	*******************************************************************************/
	package org.eclipse.ui.internal.misc;

	import java.util.ArrayList;
	import java.util.Arrays;
	import java.util.Collections;
	import java.util.List;
	import java.util.Objects;
	import java.util.regex.Pattern;
	import org.eclipse.core.text.StringMatcher;

	/**
	* Similar to {@link StringMatcher}, this {@code TextMatcher} matches a pattern
	* that may contain the wildcards '?' or '*' against a text. However, the
	* matching is not only done on the full text, but also on individual words from
	* the text, and if the pattern contains whitespace, the pattern is split into
	* sub-patterns and those are matched, too.
	* <p>
	* The precise rules are:
	* </p>
	* <ul>
	* <li>If the full pattern matches the full text, the match succeeds.</li>
	* <li>If the full pattern matches a single word of the text, the match
	* succeeds.</li>
	* <li>If all sub-patterns match a prefix of the whole text or any prefix of any
	* word, the match succeeds.</li>
	* <li>Otherwise, the match fails.</li>
	* </ul>
	* <p>
	* An empty pattern matches only the empty text.
	* </p>
	*/
	public final class TextMatcher {

	private static final Pattern NON_WORD = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS); //$NON-NLS-1$

	private final StringMatcher full;

	private final List<StringMatcher> parts;

	/**
	* Creates a new {@link TextMatcher}.
	*
	* @param pattern to match
	* @param ignoreCase whether to do case-insensitive matching
	* @param ignoreWildCards whether to treat '?' and '*' as normal characters, not
	* as wildcards
	* @throws IllegalArgumentException if {@code pattern == null}
	*/
	public TextMatcher(String pattern, boolean ignoreCase, boolean ignoreWildCards) {
	full = new StringMatcher(pattern, ignoreCase, ignoreWildCards);
	parts = splitPattern(pattern, ignoreCase, ignoreWildCards);
	}

	private List<StringMatcher> splitPattern(String pattern,
	boolean ignoreCase, boolean ignoreWildCards) {
	String pat = pattern.trim();
	if (pat.isEmpty()) {
	return Collections.emptyList();
	}
	String[] subPatterns = pattern.split("\\s+"); //$NON-NLS-1$
	if (subPatterns.length <= 1) {
	return Collections.emptyList();
	}
	List<StringMatcher> matchers = new ArrayList<>();
	for (String s : subPatterns) {
	if (s == null \|\| s.isEmpty()) {
	continue;
	}
	StringMatcher m = new StringMatcher(s, ignoreCase, ignoreWildCards);
	m.usePrefixMatch();
	matchers.add(m);
	}
	return matchers;
	}

	/**
	* Determines whether the given {@code text} matches the pattern.
	*
	* @param text String to match; must not be {@code null}
	* @return {@code true} if the whole {@code text} matches the pattern;
	* {@code false} otherwise
	* @throws IllegalArgumentException if {@code text == null}
	*/
	public boolean match(String text) {
	if (text == null) {
	throw new IllegalArgumentException();
	}
	return match(text, 0, text.length());
	}

	/**
	* Determines whether the given sub-string of {@code text} from {@code start}
	* (inclusive) to {@code end} (exclusive) matches the pattern.
	*
	* @param text String to match in; must not be {@code null}
	* @param start start index (inclusive) within {@code text} of the sub-string to
	* match
	* @param end end index (exclusive) within {@code text} of the sub-string to
	* match
	* @return {@code true} if the given slice of {@code text} matches the pattern;
	* {@code false} otherwise
	* @throws IllegalArgumentException if {@code text == null}
	*/
	public boolean match(String text, int start, int end) {
	if (text == null) {
	throw new IllegalArgumentException();
	}
	if (start > end) {
	return false;
	}
	int tlen = text.length();
	start = Math.max(0, start);
	end = Math.min(end, tlen);
	if (full.match(text, start, end)) {
	return true;
	}
	String[] words = getWords(text.substring(start, end));
	if (match(full, words)) {
	return true;
	}
	if (parts.isEmpty()) {
	return false;
	}
	for (StringMatcher subMatcher : parts) {
	if (!subMatcher.match(text, start, end) && !match(subMatcher, words)) {
	return false;
	}
	}
	return true;
	}

	private boolean match(StringMatcher matcher, String[] words) {
	return Arrays.stream(words).filter(Objects::nonNull).anyMatch(matcher::match);
	}

	/**
	* Splits a given text into words.
	*
	* @param text to split
	* @return the words of the text
	*/
	public static String[] getWords(String text) {
	// Previous implementations (in the removed StringMatcher) used the ICU
	// BreakIterator to split the text. That worked well, but in 2020 it was decided
	// to drop the dependency to the ICU library due to its size. The JDK
	// BreakIterator splits differently, causing e.g.
	// https://bugs.eclipse.org/bugs/show_bug.cgi?id=563121 . The NON_WORD regexp
	// appears to work well for programming language text, but may give sub-optimal
	// results for natural languages. See also
	// https://bugs.eclipse.org/bugs/show_bug.cgi?id=90579 .
	return NON_WORD.split(text);
	}

	@Override
	public String toString() {
	return '[' + full.toString() + ',' + parts + ']';
	}
	}