Bug 565267 - [subword] improved boundaries for mixed caps and camel case

Fixed detection of word boundary after capital name prefix.
Examples:
* IImportWizard  matched by "import":  I[Import]Wizard
* HTMLTable      matched by "table":   HTML[Table]

The state-machine based detection turned out to be not a good fit.
Instead detect boundaries by comparing the cases in every tuple of
(previous, current, next) chars.

Change-Id: If87714632f1103b1fbb96171c1c29483d7417d94
Signed-off-by: Julian Honnen <julian.honnen@vector.com>
diff --git a/org.eclipse.jdt.core.tests.model/src/org/eclipse/jdt/core/tests/model/MatchingRegionsTest.java b/org.eclipse.jdt.core.tests.model/src/org/eclipse/jdt/core/tests/model/MatchingRegionsTest.java
index c890b46..aa2aa77 100644
--- a/org.eclipse.jdt.core.tests.model/src/org/eclipse/jdt/core/tests/model/MatchingRegionsTest.java
+++ b/org.eclipse.jdt.core.tests.model/src/org/eclipse/jdt/core/tests/model/MatchingRegionsTest.java
@@ -1609,6 +1609,11 @@
 	int[] regions = SearchPattern.getMatchingRegions("ini", name, SearchPattern.R_SUBWORD_MATCH);
 	assertEquals("Unexpected matching regions", null, printRegions(name, regions));
 }
+public void testSubword_caps_boundaries3() {
+	String name = "CASE_INSENSITIVE_ORDER";
+	int[] regions = SearchPattern.getMatchingRegions("sensitive", name, SearchPattern.R_SUBWORD_MATCH);
+	assertEquals("Unexpected matching regions", null, printRegions(name, regions));
+}
 public void testSubword_caps_backtracking() {
 	String name = "LIST_LISTENER";
 	int[] regions = SearchPattern.getMatchingRegions("listener", name, SearchPattern.R_SUBWORD_MATCH);
@@ -1619,4 +1624,24 @@
 	int[] regions = SearchPattern.getMatchingRegions("addlistener", name, SearchPattern.R_SUBWORD_MATCH);
 	assertEquals("Unexpected matching regions", "[add]_list_[listener]", printRegions(name, regions));
 }
+public void testSubword_mixedCamelCase1() {
+	String name = "IImportWizard";
+	int[] regions = SearchPattern.getMatchingRegions("import", name, SearchPattern.R_SUBWORD_MATCH);
+	assertEquals("Unexpected matching regions", "I[Import]Wizard", printRegions(name, regions));
+}
+public void testSubword_mixedCamelCase2() {
+	String name = "HTMLTable";
+	int[] regions = SearchPattern.getMatchingRegions("table", name, SearchPattern.R_SUBWORD_MATCH);
+	assertEquals("Unexpected matching regions", "HTML[Table]", printRegions(name, regions));
+}
+public void testSubword_mixedCamelCase3() {
+	String name = "CustomHTMLTable";
+	int[] regions = SearchPattern.getMatchingRegions("table", name, SearchPattern.R_SUBWORD_MATCH);
+	assertEquals("Unexpected matching regions", "CustomHTML[Table]", printRegions(name, regions));
+}
+public void testSubword_mixedCamelCase4() {
+	String name = "ImportHTML";
+	int[] regions = SearchPattern.getMatchingRegions("html", name, SearchPattern.R_SUBWORD_MATCH);
+	assertEquals("Unexpected matching regions", "Import[HTML]", printRegions(name, regions));
+}
 }
diff --git a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/core/compiler/SubwordMatcher.java b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/core/compiler/SubwordMatcher.java
index 806091f..9886aef 100644
--- a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/core/compiler/SubwordMatcher.java
+++ b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/core/compiler/SubwordMatcher.java
@@ -7,7 +7,7 @@
  * https://www.eclipse.org/legal/epl-2.0/
  *
  * SPDX-License-Identifier: EPL-2.0
- * 
+ *
  * Contributors:
  *     Julian Honnen - initial API and implementation
  *******************************************************************************/
@@ -29,16 +29,42 @@
 		this.name = name.toCharArray();
 		this.wordBoundaries = new BitSet(name.length());
 
-		BoundaryState state = BoundaryState.SEPARATOR;
 		for (int i = 0; i < this.name.length; i++) {
-			char c = this.name[i];
-			if (state.isWordBoundary(c)) {
+			if (isWordBoundary(caseAt(i - 1), caseAt(i), caseAt(i + 1))) {
 				this.wordBoundaries.set(i);
 			}
-			state = state.next(c);
 		}
 	}
 
+	private Case caseAt(int index) {
+		if (index < 0 || index >= this.name.length)
+			return Case.SEPARATOR;
+
+		char c = this.name[index];
+		if (c == '_')
+			return Case.SEPARATOR;
+		if (ScannerHelper.isUpperCase(c))
+			return Case.UPPER;
+		return Case.LOWER;
+	}
+
+	private static boolean isWordBoundary(Case p, Case c, Case n) {
+		if (p == c && c == n)
+			return false; // a boundary needs some kind of gradient
+
+		if (p == Case.SEPARATOR)
+			return true; // boundary after every separator
+
+		// the remaining cases are boundaries for capitalization changes:
+		// lowerUpper, UPPERLower, lowerUPPER
+		//      ^           ^           ^
+		return (c == Case.UPPER) && (p == Case.LOWER || n == Case.LOWER);
+	}
+
+	private enum Case {
+		SEPARATOR, LOWER, UPPER
+	}
+
 	public int[] getMatchingRegions(String pattern) {
 		int segmentStart = 0;
 		int[] segments = EMPTY_REGIONS;
@@ -131,52 +157,4 @@
 	private boolean isWordBoundary(int iName) {
 		return this.wordBoundaries.get(iName);
 	}
-
-	private enum BoundaryState {
-		SEPARATOR() {
-			@Override
-			public BoundaryState next(char c) {
-				if (c == '_')
-					return SEPARATOR;
-
-				return ScannerHelper.isUpperCase(c) ? CAPS_WORD : WORD;
-			}
-			@Override
-			public boolean isWordBoundary(char c) {
-				return true;
-			}
-		},
-		WORD() {
-			@Override
-			public BoundaryState next(char c) {
-				if (c == '_')
-					return SEPARATOR;
-
-				return WORD;
-			}
-
-			@Override
-			public boolean isWordBoundary(char c) {
-				return ScannerHelper.isUpperCase(c);
-			}
-		},
-		CAPS_WORD() {
-			@Override
-			public BoundaryState next(char c) {
-				if (c == '_')
-					return SEPARATOR;
-
-				return ScannerHelper.isUpperCase(c) ? CAPS_WORD : WORD;
-			}
-
-			@Override
-			public boolean isWordBoundary(char c) {
-				return next(c) == SEPARATOR;
-			}
-		};
-
-		public abstract boolean isWordBoundary(char c);
-
-		public abstract BoundaryState next(char c);
-	}
 }
\ No newline at end of file