Bug 563485 - Refactor StringMatcher into TextMatcher

Re-implement the matcher using org.eclipse.core.text.StringMatcher
and rename it to TextMatcher to avoid confusions. Fix the prefix
matching on individual words if wildcards are to be taken literally.

Add tests for the new TextMatcher.

The old StringMatcher is kept because it is used (despite being
internal!) in org.eclipse.equinox.p2.ui.discovery. Once it'll have
been replaced there by TextMatcher, StringMatcher can be removed.

Change-Id: Ib5bcb0617a6c894d0b0b2bff25d1d4ab279210a7
Signed-off-by: Thomas Wolf <thomas.wolf@paranor.ch>
diff --git a/bundles/org.eclipse.ui.workbench/Eclipse UI/org/eclipse/ui/dialogs/FilteredList.java b/bundles/org.eclipse.ui.workbench/Eclipse UI/org/eclipse/ui/dialogs/FilteredList.java
index d61fd41..d1dbc1b 100644
--- a/bundles/org.eclipse.ui.workbench/Eclipse UI/org/eclipse/ui/dialogs/FilteredList.java
+++ b/bundles/org.eclipse.ui.workbench/Eclipse UI/org/eclipse/ui/dialogs/FilteredList.java
@@ -36,7 +36,7 @@
 import org.eclipse.swt.widgets.Table;
 import org.eclipse.swt.widgets.TableItem;
 import org.eclipse.ui.internal.WorkbenchMessages;
-import org.eclipse.ui.internal.misc.StringMatcher;
+import org.eclipse.ui.internal.misc.TextMatcher;
 import org.eclipse.ui.internal.util.Util;
 import org.eclipse.ui.progress.WorkbenchJob;
 
@@ -73,11 +73,11 @@
 	}
 
 	private class DefaultFilterMatcher implements FilterMatcher {
-		private StringMatcher fMatcher;
+		private TextMatcher fMatcher;
 
 		@Override
 		public void setFilter(String pattern, boolean ignoreCase, boolean ignoreWildCards) {
-			fMatcher = new StringMatcher(pattern + '*', ignoreCase, ignoreWildCards);
+			fMatcher = new TextMatcher(pattern + '*', ignoreCase, ignoreWildCards);
 		}
 
 		@Override
diff --git a/bundles/org.eclipse.ui.workbench/Eclipse UI/org/eclipse/ui/dialogs/PatternFilter.java b/bundles/org.eclipse.ui.workbench/Eclipse UI/org/eclipse/ui/dialogs/PatternFilter.java
index 4e1c20d..62edbd1 100644
--- a/bundles/org.eclipse.ui.workbench/Eclipse UI/org/eclipse/ui/dialogs/PatternFilter.java
+++ b/bundles/org.eclipse.ui.workbench/Eclipse UI/org/eclipse/ui/dialogs/PatternFilter.java
@@ -23,7 +23,7 @@
 import org.eclipse.jface.viewers.ITreeContentProvider;
 import org.eclipse.jface.viewers.Viewer;
 import org.eclipse.jface.viewers.ViewerFilter;
-import org.eclipse.ui.internal.misc.StringMatcher;
+import org.eclipse.ui.internal.misc.TextMatcher;
 
 /**
  * A filter used in conjunction with <code>FilteredTree</code>. In order to
@@ -57,7 +57,7 @@
 	/**
 	 * The string pattern matcher used for this pattern filter.
 	 */
-	private StringMatcher matcher;
+	private TextMatcher matcher;
 
 	private boolean useEarlyReturnIfMatcherIsNull = true;
 
@@ -175,7 +175,7 @@
 			if (includeLeadingWildcard) {
 				pattern = "*" + pattern; //$NON-NLS-1$
 			}
-			matcher = new StringMatcher(pattern, true, false);
+			matcher = new TextMatcher(pattern, true, false);
 		}
 	}
 
@@ -291,7 +291,7 @@
 		}
 
 		// Otherwise check if any of the words of the text matches
-		String[] words = StringMatcher.getWords(text);
+		String[] words = TextMatcher.getWords(text);
 		for (String word : words) {
 			if (!match(word)) {
 				return false;
diff --git a/bundles/org.eclipse.ui.workbench/Eclipse UI/org/eclipse/ui/dialogs/SearchPattern.java b/bundles/org.eclipse.ui.workbench/Eclipse UI/org/eclipse/ui/dialogs/SearchPattern.java
index c571193..7f25b42 100644
--- a/bundles/org.eclipse.ui.workbench/Eclipse UI/org/eclipse/ui/dialogs/SearchPattern.java
+++ b/bundles/org.eclipse.ui.workbench/Eclipse UI/org/eclipse/ui/dialogs/SearchPattern.java
@@ -14,7 +14,7 @@
 package org.eclipse.ui.dialogs;
 
 import org.eclipse.jface.util.Util;
-import org.eclipse.ui.internal.misc.StringMatcher;
+import org.eclipse.ui.internal.misc.TextMatcher;
 
 /**
  * A search pattern defines how search results are found.
@@ -95,7 +95,7 @@
 
 	private String initialPattern;
 
-	private StringMatcher stringMatcher;
+	private TextMatcher stringMatcher;
 
 	private static final char END_SYMBOL = '<';
 
@@ -158,7 +158,7 @@
 		initializePatternAndMatchRule(stringPattern);
 		matchRule = matchRule & this.allowedRules;
 		if (matchRule == RULE_PATTERN_MATCH) {
-			stringMatcher = new StringMatcher(this.stringPattern, true, false);
+			stringMatcher = new TextMatcher(this.stringPattern, true, false);
 		}
 	}
 
diff --git a/bundles/org.eclipse.ui.workbench/Eclipse UI/org/eclipse/ui/internal/about/AboutPluginsPage.java b/bundles/org.eclipse.ui.workbench/Eclipse UI/org/eclipse/ui/internal/about/AboutPluginsPage.java
index efe040d..1f8a5cf 100644
--- a/bundles/org.eclipse.ui.workbench/Eclipse UI/org/eclipse/ui/internal/about/AboutPluginsPage.java
+++ b/bundles/org.eclipse.ui.workbench/Eclipse UI/org/eclipse/ui/internal/about/AboutPluginsPage.java
@@ -72,7 +72,7 @@
 import org.eclipse.ui.internal.WorkbenchMessages;
 import org.eclipse.ui.internal.WorkbenchPlugin;
 import org.eclipse.ui.internal.misc.StatusUtil;
-import org.eclipse.ui.internal.misc.StringMatcher;
+import org.eclipse.ui.internal.misc.TextMatcher;
 import org.eclipse.ui.internal.util.BundleUtility;
 import org.eclipse.ui.progress.WorkbenchJob;
 import org.eclipse.ui.statushandlers.StatusManager;
@@ -674,14 +674,14 @@
 
 class BundlePatternFilter extends ViewerFilter {
 
-	private StringMatcher matcher;
+	private TextMatcher matcher;
 
 	public void setPattern(String searchPattern) {
 		if (searchPattern == null || searchPattern.length() == 0) {
 			this.matcher = null;
 		} else {
 			String pattern = "*" + searchPattern + "*"; //$NON-NLS-1$//$NON-NLS-2$
-			this.matcher = new StringMatcher(pattern, true, false);
+			this.matcher = new TextMatcher(pattern, true, false);
 		}
 	}
 
diff --git a/bundles/org.eclipse.ui.workbench/Eclipse UI/org/eclipse/ui/internal/misc/TextMatcher.java b/bundles/org.eclipse.ui.workbench/Eclipse UI/org/eclipse/ui/internal/misc/TextMatcher.java
new file mode 100644
index 0000000..ae19839
--- /dev/null
+++ b/bundles/org.eclipse.ui.workbench/Eclipse UI/org/eclipse/ui/internal/misc/TextMatcher.java
@@ -0,0 +1,168 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Thomas Wolf<thomas.wolf@paranor.ch> and others.
+ *
+ * This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License 2.0
+ * which accompanies this distribution, and is available at
+ * https://www.eclipse.org/legal/epl-2.0/
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *******************************************************************************/
+package org.eclipse.ui.internal.misc;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+import java.util.regex.Pattern;
+import org.eclipse.core.text.StringMatcher;
+
+/**
+ * Similar to {@link StringMatcher}, this {@code TextMatcher} matches a pattern
+ * that may contain the wildcards '?' or '*' against a text. However, the
+ * matching is not only done on the full text, but also on individual words from
+ * the text, and if the pattern contains whitespace, the pattern is split into
+ * sub-patterns and those are matched, too.
+ * <p>
+ * The precise rules are:
+ * </p>
+ * <ul>
+ * <li>If the full pattern matches the full text, the match succeeds.</li>
+ * <li>If the full pattern matches a single word of the text, the match
+ * succeeds.</li>
+ * <li>If all sub-patterns match a prefix of the whole text or any prefix of any
+ * word, the match succeeds.</li>
+ * <li>Otherwise, the match fails.</li>
+ * </ul>
+ * <p>
+ * An empty pattern matches only the empty text.
+ * </p>
+ */
+public final class TextMatcher {
+
+	private static final Pattern NON_WORD = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS); //$NON-NLS-1$
+
+	private final StringMatcher full;
+
+	private final List<StringMatcher> parts;
+
+	/**
+	 * Creates a new {@link TextMatcher}.
+	 *
+	 * @param pattern         to match
+	 * @param ignoreCase      whether to do case-insensitive matching
+	 * @param ignoreWildCards whether to treat '?' and '*' as normal characters, not
+	 *                        as wildcards
+	 * @throws IllegalArgumentException if {@code pattern == null}
+	 */
+	public TextMatcher(String pattern, boolean ignoreCase, boolean ignoreWildCards) {
+		full = new StringMatcher(pattern, ignoreCase, ignoreWildCards);
+		parts = splitPattern(pattern, ignoreCase, ignoreWildCards);
+	}
+
+	private List<StringMatcher> splitPattern(String pattern,
+			boolean ignoreCase, boolean ignoreWildCards) {
+		String pat = pattern.trim();
+		if (pat.isEmpty()) {
+			return Collections.emptyList();
+		}
+		String[] subPatterns = pattern.split("\\s+"); //$NON-NLS-1$
+		if (subPatterns.length <= 1) {
+			return Collections.emptyList();
+		}
+		List<StringMatcher> matchers = new ArrayList<>();
+		for (String s : subPatterns) {
+			if (s == null || s.isEmpty()) {
+				continue;
+			}
+			StringMatcher m = new StringMatcher(s, ignoreCase, ignoreWildCards);
+			m.usePrefixMatch();
+			matchers.add(m);
+		}
+		return matchers;
+	}
+
+	/**
+	 * Determines whether the given {@code text} matches the pattern.
+	 *
+	 * @param text String to match; must not be {@code null}
+	 * @return {@code true} if the whole {@code text} matches the pattern;
+	 *         {@code false} otherwise
+	 * @throws IllegalArgumentException if {@code text == null}
+	 */
+	public boolean match(String text) {
+		if (text == null) {
+			throw new IllegalArgumentException();
+		}
+		return match(text, 0, text.length());
+	}
+
+	/**
+	 * Determines whether the given sub-string of {@code text} from {@code start}
+	 * (inclusive) to {@code end} (exclusive) matches the pattern.
+	 *
+	 * @param text  String to match in; must not be {@code null}
+	 * @param start start index (inclusive) within {@code text} of the sub-string to
+	 *              match
+	 * @param end   end index (exclusive) within {@code text} of the sub-string to
+	 *              match
+	 * @return {@code true} if the given slice of {@code text} matches the pattern;
+	 *         {@code false} otherwise
+	 * @throws IllegalArgumentException if {@code text == null}
+	 */
+	public boolean match(String text, int start, int end) {
+		if (text == null) {
+			throw new IllegalArgumentException();
+		}
+		if (start > end) {
+			return false;
+		}
+		int tlen = text.length();
+		start = Math.max(0, start);
+		end = Math.min(end, tlen);
+		if (full.match(text, start, end)) {
+			return true;
+		}
+		String[] words = getWords(text.substring(start, end));
+		if (match(full, words)) {
+			return true;
+		}
+		if (parts.isEmpty()) {
+			return false;
+		}
+		for (StringMatcher subMatcher : parts) {
+			if (!subMatcher.match(text, start, end) && !match(subMatcher, words)) {
+				return false;
+			}
+		}
+		return true;
+	}
+
+	private boolean match(StringMatcher matcher, String[] words) {
+		return Arrays.stream(words).filter(Objects::nonNull).anyMatch(matcher::match);
+	}
+
+	/**
+	 * Splits a given text into words.
+	 *
+	 * @param text to split
+	 * @return the words of the text
+	 */
+	public static String[] getWords(String text) {
+		// Previous implementations (in the removed StringMatcher) used the ICU
+		// BreakIterator to split the text. That worked well, but in 2020 it was decided
+		// to drop the dependency to the ICU library due to its size. The JDK
+		// BreakIterator splits differently, causing e.g.
+		// https://bugs.eclipse.org/bugs/show_bug.cgi?id=563121 . The NON_WORD regexp
+		// appears to work well for programming language text, but may give sub-optimal
+		// results for natural languages. See also
+		// https://bugs.eclipse.org/bugs/show_bug.cgi?id=90579 .
+		return NON_WORD.split(text);
+	}
+
+	@Override
+	public String toString() {
+		return '[' + full.toString() + ',' + parts + ']';
+	}
+}
diff --git a/bundles/org.eclipse.ui.workbench/META-INF/MANIFEST.MF b/bundles/org.eclipse.ui.workbench/META-INF/MANIFEST.MF
index 526c4b3..9ab0855 100644
--- a/bundles/org.eclipse.ui.workbench/META-INF/MANIFEST.MF
+++ b/bundles/org.eclipse.ui.workbench/META-INF/MANIFEST.MF
@@ -94,7 +94,7 @@
  org.eclipse.ui.themes,
  org.eclipse.ui.views,
  org.eclipse.ui.wizards
-Require-Bundle: org.eclipse.core.runtime;bundle-version="[3.14.0,4.0.0)",
+Require-Bundle: org.eclipse.core.runtime;bundle-version="[3.19.0,4.0.0)",
  org.eclipse.help;bundle-version="[3.2.0,4.0.0)",
  org.eclipse.jface;bundle-version="[3.18.0,4.0.0)",
  org.eclipse.swt;bundle-version="[3.107.0,4.0.0)",
diff --git a/bundles/org.eclipse.ui/META-INF/MANIFEST.MF b/bundles/org.eclipse.ui/META-INF/MANIFEST.MF
index c0e12f6..74fd0b4 100644
--- a/bundles/org.eclipse.ui/META-INF/MANIFEST.MF
+++ b/bundles/org.eclipse.ui/META-INF/MANIFEST.MF
@@ -2,7 +2,7 @@
 Bundle-ManifestVersion: 2
 Bundle-Name: %Plugin.name
 Bundle-SymbolicName: org.eclipse.ui; singleton:=true
-Bundle-Version: 3.117.100.qualifier
+Bundle-Version: 3.118.0.qualifier
 Bundle-ClassPath: .
 Bundle-Activator: org.eclipse.ui.internal.UIPlugin
 Bundle-ActivationPolicy: lazy
@@ -12,7 +12,7 @@
 Require-Bundle: org.eclipse.core.runtime;bundle-version="[3.2.0,4.0.0)",
  org.eclipse.swt;bundle-version="[3.103.0,4.0.0)";visibility:=reexport,
  org.eclipse.jface;bundle-version="[3.19.0,4.0.0)";visibility:=reexport,
- org.eclipse.ui.workbench;bundle-version="[3.119.0,4.0.0)";visibility:=reexport,
+ org.eclipse.ui.workbench;bundle-version="[3.120.0,4.0.0)";visibility:=reexport,
  org.eclipse.core.expressions;bundle-version="[3.4.0,4.0.0)"
 Bundle-RequiredExecutionEnvironment: JavaSE-1.8
 Automatic-Module-Name: org.eclipse.ui
diff --git a/tests/org.eclipse.ui.tests/Eclipse UI Tests/org/eclipse/ui/tests/UiTestSuite.java b/tests/org.eclipse.ui.tests/Eclipse UI Tests/org/eclipse/ui/tests/UiTestSuite.java
index f4a073f..19374f1 100644
--- a/tests/org.eclipse.ui.tests/Eclipse UI Tests/org/eclipse/ui/tests/UiTestSuite.java
+++ b/tests/org.eclipse.ui.tests/Eclipse UI Tests/org/eclipse/ui/tests/UiTestSuite.java
@@ -36,6 +36,7 @@
 import org.eclipse.ui.tests.fieldassist.FieldAssistTestSuite;
 import org.eclipse.ui.tests.filteredtree.FilteredTreeTests;
 import org.eclipse.ui.tests.filteredtree.PatternFilterTest;
+import org.eclipse.ui.tests.filteredtree.TextMatcherTest;
 import org.eclipse.ui.tests.internal.InternalTestSuite;
 import org.eclipse.ui.tests.intro.IntroTestSuite;
 import org.eclipse.ui.tests.keys.KeysTestSuite;
@@ -87,6 +88,7 @@
 	ConcurrencyTestSuite.class,
 	FilteredTreeTests.class,
 	PatternFilterTest.class,
+	TextMatcherTest.class,
 	StatusHandlingTestSuite.class,
 	MenusTestSuite.class,
 	QuickAccessTestSuite.class,
diff --git a/tests/org.eclipse.ui.tests/Eclipse UI Tests/org/eclipse/ui/tests/filteredtree/TextMatcherTest.java b/tests/org.eclipse.ui.tests/Eclipse UI Tests/org/eclipse/ui/tests/filteredtree/TextMatcherTest.java
new file mode 100644
index 0000000..3e6ca63
--- /dev/null
+++ b/tests/org.eclipse.ui.tests/Eclipse UI Tests/org/eclipse/ui/tests/filteredtree/TextMatcherTest.java
@@ -0,0 +1,96 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Thomas Wolf<thomas.wolf@paranor.ch> and others.
+ *
+ * This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License 2.0
+ * which accompanies this distribution, and is available at
+ * https://www.eclipse.org/legal/epl-2.0/
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *******************************************************************************/
+package org.eclipse.ui.tests.filteredtree;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.eclipse.ui.internal.misc.TextMatcher;
+import org.junit.Test;
+
+/**
+ * Tests for {@link TextMatcher}.
+ */
+public class TextMatcherTest {
+
+	@Test
+	public void testEmpty() {
+		assertTrue(new TextMatcher("", false, false).match(""));
+		assertFalse(new TextMatcher("", false, false).match("foo"));
+		assertFalse(new TextMatcher("", false, false).match("foo bar baz"));
+		assertTrue(new TextMatcher("", false, true).match(""));
+		assertFalse(new TextMatcher("", false, true).match("foo"));
+		assertFalse(new TextMatcher("", false, true).match("foo bar baz"));
+	}
+
+	@Test
+	public void testSuffixes() {
+		assertFalse(new TextMatcher("fo*ar", false, false).match("foobar_123"));
+		assertFalse(new TextMatcher("fo*ar", false, false).match("foobar_baz"));
+	}
+
+	@Test
+	public void testChinese() {
+		assertTrue(new TextMatcher("喜欢", false, false).match("我 喜欢 吃 苹果。"));
+		// This test would work only if word-splitting used the ICU BreakIterator.
+		// "Words" are as shown above.
+		// assertTrue(new TextMatcher("喜欢", false, false).match("我喜欢吃苹果。"));
+	}
+
+	@Test
+	public void testSingleWords() {
+		assertTrue(new TextMatcher("huhn", false, false).match("hahn henne hühner küken huhn"));
+		assertTrue(new TextMatcher("h?hner", false, false).match("hahn henne hühner küken huhn"));
+		assertTrue(new TextMatcher("h*hner", false, false).match("hahn henne hühner küken huhn"));
+		assertTrue(new TextMatcher("hühner", false, false).match("hahn henne hühner küken huhn"));
+		// Full pattern must match word fully
+		assertFalse(new TextMatcher("h?hner", false, false).match("hahn henne hühnerhof küken huhn"));
+		assertFalse(new TextMatcher("h*hner", false, false).match("hahn henne hühnerhof küken huhn"));
+		assertFalse(new TextMatcher("hühner", false, false).match("hahn henne hühnerhof küken huhn"));
+
+		assertTrue(new TextMatcher("huhn", false, true).match("hahn henne hühner küken huhn"));
+		assertFalse(new TextMatcher("h?hner", false, true).match("hahn henne hühner küken huhn"));
+		assertFalse(new TextMatcher("h*hner", false, true).match("hahn henne hühner küken huhn"));
+		assertTrue(new TextMatcher("hühner", false, true).match("hahn henne hühner küken huhn"));
+		// Full pattern must match word fully
+		assertFalse(new TextMatcher("h?hner", false, true).match("hahn henne hühnerhof küken huhn"));
+		assertFalse(new TextMatcher("h*hner", false, true).match("hahn henne hühnerhof küken huhn"));
+		assertFalse(new TextMatcher("hühner", false, true).match("hahn henne hühnerhof küken huhn"));
+	}
+
+	@Test
+	public void testMultipleWords() {
+		assertTrue(new TextMatcher("huhn h?hner", false, false).match("hahn henne hühner küken huhn"));
+		assertTrue(new TextMatcher("huhn h?hner", false, false).match("hahn henne hühnerhof küken huhn"));
+		assertFalse(new TextMatcher("huhn h?hner", false, true).match("hahn henne hühner küken huhn"));
+		assertFalse(new TextMatcher("huhn h?hner", false, true).match("hahn henne hühnerhof küken huhn"));
+		assertTrue(new TextMatcher("huhn h*hner", false, false).match("hahn henne hühner küken huhn"));
+		assertTrue(new TextMatcher("huhn h*hner", false, false).match("hahn henne hühnerhof küken huhn"));
+		assertFalse(new TextMatcher("huhn h*hner", false, true).match("hahn henne hühner küken huhn"));
+		assertFalse(new TextMatcher("huhn h*hner", false, true).match("hahn henne hühnerhof küken huhn"));
+		assertTrue(new TextMatcher("huhn hühner", false, false).match("hahn henne hühner küken huhn"));
+		assertTrue(new TextMatcher("huhn hühner", false, false).match("hahn henne hühnerhof küken huhn"));
+		assertTrue(new TextMatcher("huhn hühner", false, true).match("hahn henne hühner küken huhn"));
+		assertTrue(new TextMatcher("huhn hühner", false, true).match("hahn henne hühnerhof küken huhn"));
+	}
+
+	@Test
+	public void testCaseInsensitivity() {
+		assertTrue(new TextMatcher("Huhn HÜHNER", true, false).match("hahn henne hühner küken huhn"));
+		assertTrue(new TextMatcher("Huhn HÜHNER", true, false).match("hahn henne hühnerhof küken huhn"));
+		assertTrue(new TextMatcher("Huhn HÜHNER", true, true).match("hahn henne hühner küken huhn"));
+		assertTrue(new TextMatcher("Huhn HÜHNER", true, true).match("hahn henne hühnerhof küken huhn"));
+		assertTrue(new TextMatcher("HüHnEr", true, false).match("hahn henne hühner küken huhn"));
+		assertFalse(new TextMatcher("HüHnEr", true, false).match("hahn henne hühnerhof küken huhn"));
+		assertTrue(new TextMatcher("HüHnEr", true, true).match("hahn henne hühner küken huhn"));
+		assertFalse(new TextMatcher("HüHnEr", true, true).match("hahn henne hühnerhof küken huhn"));
+	}
+}