Bug 565267 - [subword] improved boundaries for mixed caps and camel case
Fixed detection of word boundary after capital name prefix.
Examples:
* IImportWizard matched by "import": I[Import]Wizard
* HTMLTable matched by "table": HTML[Table]
The state-machine based detection turned out to be not a good fit.
Instead detect boundaries by comparing the cases in every tuple of
(previous, current, next) chars.
Change-Id: If87714632f1103b1fbb96171c1c29483d7417d94
Signed-off-by: Julian Honnen <julian.honnen@vector.com>
diff --git a/org.eclipse.jdt.core.tests.model/src/org/eclipse/jdt/core/tests/model/MatchingRegionsTest.java b/org.eclipse.jdt.core.tests.model/src/org/eclipse/jdt/core/tests/model/MatchingRegionsTest.java
index c890b46..aa2aa77 100644
--- a/org.eclipse.jdt.core.tests.model/src/org/eclipse/jdt/core/tests/model/MatchingRegionsTest.java
+++ b/org.eclipse.jdt.core.tests.model/src/org/eclipse/jdt/core/tests/model/MatchingRegionsTest.java
@@ -1609,6 +1609,11 @@
int[] regions = SearchPattern.getMatchingRegions("ini", name, SearchPattern.R_SUBWORD_MATCH);
assertEquals("Unexpected matching regions", null, printRegions(name, regions));
}
+public void testSubword_caps_boundaries3() {
+ String name = "CASE_INSENSITIVE_ORDER";
+ int[] regions = SearchPattern.getMatchingRegions("sensitive", name, SearchPattern.R_SUBWORD_MATCH);
+ assertEquals("Unexpected matching regions", null, printRegions(name, regions));
+}
public void testSubword_caps_backtracking() {
String name = "LIST_LISTENER";
int[] regions = SearchPattern.getMatchingRegions("listener", name, SearchPattern.R_SUBWORD_MATCH);
@@ -1619,4 +1624,24 @@
int[] regions = SearchPattern.getMatchingRegions("addlistener", name, SearchPattern.R_SUBWORD_MATCH);
assertEquals("Unexpected matching regions", "[add]_list_[listener]", printRegions(name, regions));
}
+public void testSubword_mixedCamelCase1() {
+ String name = "IImportWizard";
+ int[] regions = SearchPattern.getMatchingRegions("import", name, SearchPattern.R_SUBWORD_MATCH);
+ assertEquals("Unexpected matching regions", "I[Import]Wizard", printRegions(name, regions));
+}
+public void testSubword_mixedCamelCase2() {
+ String name = "HTMLTable";
+ int[] regions = SearchPattern.getMatchingRegions("table", name, SearchPattern.R_SUBWORD_MATCH);
+ assertEquals("Unexpected matching regions", "HTML[Table]", printRegions(name, regions));
+}
+public void testSubword_mixedCamelCase3() {
+ String name = "CustomHTMLTable";
+ int[] regions = SearchPattern.getMatchingRegions("table", name, SearchPattern.R_SUBWORD_MATCH);
+ assertEquals("Unexpected matching regions", "CustomHTML[Table]", printRegions(name, regions));
+}
+public void testSubword_mixedCamelCase4() {
+ String name = "ImportHTML";
+ int[] regions = SearchPattern.getMatchingRegions("html", name, SearchPattern.R_SUBWORD_MATCH);
+ assertEquals("Unexpected matching regions", "Import[HTML]", printRegions(name, regions));
+}
}
diff --git a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/core/compiler/SubwordMatcher.java b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/core/compiler/SubwordMatcher.java
index 806091f..9886aef 100644
--- a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/core/compiler/SubwordMatcher.java
+++ b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/core/compiler/SubwordMatcher.java
@@ -7,7 +7,7 @@
* https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
- *
+ *
* Contributors:
* Julian Honnen - initial API and implementation
*******************************************************************************/
@@ -29,16 +29,42 @@
this.name = name.toCharArray();
this.wordBoundaries = new BitSet(name.length());
- BoundaryState state = BoundaryState.SEPARATOR;
for (int i = 0; i < this.name.length; i++) {
- char c = this.name[i];
- if (state.isWordBoundary(c)) {
+ if (isWordBoundary(caseAt(i - 1), caseAt(i), caseAt(i + 1))) {
this.wordBoundaries.set(i);
}
- state = state.next(c);
}
}
+ private Case caseAt(int index) {
+ if (index < 0 || index >= this.name.length)
+ return Case.SEPARATOR;
+
+ char c = this.name[index];
+ if (c == '_')
+ return Case.SEPARATOR;
+ if (ScannerHelper.isUpperCase(c))
+ return Case.UPPER;
+ return Case.LOWER;
+ }
+
+ private static boolean isWordBoundary(Case p, Case c, Case n) {
+ if (p == c && c == n)
+ return false; // a boundary needs some kind of gradient
+
+ if (p == Case.SEPARATOR)
+ return true; // boundary after every separator
+
+ // the remaining cases are boundaries for capitalization changes:
+ // lowerUpper, UPPERLower, lowerUPPER
+ // ^ ^ ^
+ return (c == Case.UPPER) && (p == Case.LOWER || n == Case.LOWER);
+ }
+
+ private enum Case {
+ SEPARATOR, LOWER, UPPER
+ }
+
public int[] getMatchingRegions(String pattern) {
int segmentStart = 0;
int[] segments = EMPTY_REGIONS;
@@ -131,52 +157,4 @@
private boolean isWordBoundary(int iName) {
return this.wordBoundaries.get(iName);
}
-
- private enum BoundaryState {
- SEPARATOR() {
- @Override
- public BoundaryState next(char c) {
- if (c == '_')
- return SEPARATOR;
-
- return ScannerHelper.isUpperCase(c) ? CAPS_WORD : WORD;
- }
- @Override
- public boolean isWordBoundary(char c) {
- return true;
- }
- },
- WORD() {
- @Override
- public BoundaryState next(char c) {
- if (c == '_')
- return SEPARATOR;
-
- return WORD;
- }
-
- @Override
- public boolean isWordBoundary(char c) {
- return ScannerHelper.isUpperCase(c);
- }
- },
- CAPS_WORD() {
- @Override
- public BoundaryState next(char c) {
- if (c == '_')
- return SEPARATOR;
-
- return ScannerHelper.isUpperCase(c) ? CAPS_WORD : WORD;
- }
-
- @Override
- public boolean isWordBoundary(char c) {
- return next(c) == SEPARATOR;
- }
- };
-
- public abstract boolean isWordBoundary(char c);
-
- public abstract BoundaryState next(char c);
- }
}
\ No newline at end of file