Bug 575893 - [performance] improve file search: non-regexp
Use Pattern.quote for non-wildcards in non-regexp search
instead of escaping each single character.
Change-Id: I061506448cdde5ab17ad1b9c551111472acf47eb
Signed-off-by: Joerg Kubitz <jkubitz-eclipse@gmx.de>
Reviewed-on: https://git.eclipse.org/r/c/platform/eclipse.platform.text/+/185193
Tested-by: Platform Bot <platform-bot@eclipse.org>
Reviewed-by: Lars Vogel <Lars.Vogel@vogella.com>
diff --git a/org.eclipse.search.tests/src/org/eclipse/search/tests/filesearch/FileSearchTests.java b/org.eclipse.search.tests/src/org/eclipse/search/tests/filesearch/FileSearchTests.java
index 51e3c09..8439dc2 100644
--- a/org.eclipse.search.tests/src/org/eclipse/search/tests/filesearch/FileSearchTests.java
+++ b/org.eclipse.search.tests/src/org/eclipse/search/tests/filesearch/FileSearchTests.java
@@ -362,6 +362,45 @@
}
@Test
+ public void testWildcardQuotes() throws Exception {
+ assertWildcardReplace("H", "Hallo", "-allo");
+ assertWildcardReplace("a", "Hallo", "H-llo");
+ assertWildcardReplace("al", "Hallo", "H-lo");
+ assertWildcardReplace("a*", "Hallo", "H-");
+ assertWildcardReplace("a?", "Hallo", "H-lo");
+ assertWildcardReplace("?", "Hallo", "-----");
+ assertWildcardReplace("{", "Ha({o", "Ha(-o");
+ assertWildcardReplace("(", "Ha({o", "Ha-{o");
+ assertWildcardReplace("\\", "Ha\\\\o", "Ha--o");
+ assertWildcardReplace("\\\\", "Ha\\\\o", "Ha--o");
+ assertWildcardReplace("\\*", "Hall*", "Hall-");
+ assertWildcardReplace("\\?", "Ha??o?", "Ha--o-");
+ assertWildcardReplace("Du?und?ich", "Du und ich nicht", "- nicht");
+ assertWildcardReplace("Du*ich", "Du und ich nicht", "-t");
+ assertWildcardReplace("und*ich", "Du und ich nicht", "Du -t");
+ assertWildcardReplace("*ich", "Du und ich nicht", "-t");
+
+ assertWildcardReplace("*", "Hallo", "--");
+ // XXX i expect it to be "-" but ".*" indeed matches chars 0-5 and 5-5
+ // it would need ".+" to not match the empty string at the end
+ }
+
+ private void assertWildcardReplace(String pattern, String in, String expected) {
+ String regex= asRegEx(true, pattern);
+ try {
+ String replaced= in.replaceAll(regex, "-");
+ assertEquals(expected, replaced);
+ } catch (Exception e) {
+ throw new RuntimeException("Error with pattern:" + pattern + " regex=" + regex, e);
+ }
+ }
+
+ String asRegEx(boolean wildcards, String pattern) {
+ StringBuilder b= new StringBuilder();
+ org.eclipse.search.internal.core.text.PatternConstructor.appendAsRegEx(wildcards, pattern, b);
+ return b.toString();
+ }
+ @Test
public void testDerivedFilesParallel() throws Exception {
testDerivedFiles(new ParallelTestResultCollector());
}
diff --git a/org.eclipse.search/search/org/eclipse/search/internal/core/text/PatternConstructor.java b/org.eclipse.search/search/org/eclipse/search/internal/core/text/PatternConstructor.java
index b785782..287fb18 100644
--- a/org.eclipse.search/search/org/eclipse/search/internal/core/text/PatternConstructor.java
+++ b/org.eclipse.search/search/org/eclipse/search/internal/core/text/PatternConstructor.java
@@ -66,7 +66,7 @@
if (isWholeWord && len > 0 && isWordChar(pattern.charAt(len - 1))) {
buffer.append("\\b"); //$NON-NLS-1$
}
- pattern= buffer.toString();
+ pattern= buffer.toString();
}
int regexOptions= Pattern.MULTILINE;
@@ -183,6 +183,12 @@
public static StringBuilder appendAsRegEx(boolean isStringMatcher, String pattern, StringBuilder buffer) {
+ if (!isStringMatcher) {
+ buffer.append(Pattern.quote(pattern));
+ return buffer;
+ }
+ // isStringMatcher: '*' and '?' wildcards and '\' as escape
+ StringBuilder quoted = new StringBuilder();
boolean isEscaped= false;
for (int i = 0; i < pattern.length(); i++) {
char c = pattern.charAt(i);
@@ -190,59 +196,46 @@
// the backslash
case '\\':
// the backslash is escape char in string matcher
- if (isStringMatcher && !isEscaped) {
+ if (!isEscaped) {
isEscaped= true;
}
else {
- buffer.append("\\\\"); //$NON-NLS-1$
+ quoted.append(c);
isEscaped= false;
}
break;
- // characters that need to be escaped in the regex.
- case '(':
- case ')':
- case '{':
- case '}':
- case '.':
- case '[':
- case ']':
- case '$':
- case '^':
- case '+':
- case '|':
- if (isEscaped) {
- buffer.append("\\\\"); //$NON-NLS-1$
- isEscaped= false;
- }
- buffer.append('\\');
- buffer.append(c);
- break;
case '?':
- if (isStringMatcher && !isEscaped) {
+ if (!isEscaped) {
+ if (quoted.length() > 0) { // flush quote
+ buffer.append(Pattern.quote(quoted.toString()));
+ quoted = new StringBuilder();
+ }
buffer.append('.');
}
else {
- buffer.append('\\');
- buffer.append(c);
+ quoted.append(c);
isEscaped= false;
}
break;
case '*':
- if (isStringMatcher && !isEscaped) {
+ if (!isEscaped) {
+ if (quoted.length() > 0) { // flush quote
+ buffer.append(Pattern.quote(quoted.toString()));
+ quoted = new StringBuilder();
+ }
buffer.append(".*"); //$NON-NLS-1$
}
else {
- buffer.append('\\');
- buffer.append(c);
+ quoted.append(c);
isEscaped= false;
}
break;
default:
if (isEscaped) {
- buffer.append("\\\\"); //$NON-NLS-1$
+ quoted.append("\\"); //$NON-NLS-1$
isEscaped= false;
}
- buffer.append(c);
+ quoted.append(c);
break;
}
}
@@ -250,6 +243,10 @@
buffer.append("\\\\"); //$NON-NLS-1$
isEscaped= false;
}
+ if (quoted.length() > 0) { // flush quote
+ buffer.append(Pattern.quote(quoted.toString()));
+ quoted = new StringBuilder();
+ }
return buffer;
}