blob: 425ef995b0f25a04e3505d466db6e855402359f1 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2000, 2014 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* IBM Corporation - initial API and implementation
* Red Hat, Inc. - Bug #426939 - [validator] HTML5 attribute validator
* marks ng-app AngularJS attributes as undefined
*******************************************************************************/
package org.eclipse.wst.html.core.internal.validate;
import java.util.ArrayList;
/**
* A string pattern matcher, supporting "*" and "?" wild cards.
*
* @since 3.2
*/
public class StringMatcher {
private static final char SINGLE_WILD_CARD = '\u0000';
/**
* Boundary value beyond which we don't need to search in the text
*/
private int bound = 0;
private boolean hasLeadingStar;
private boolean hasTrailingStar;
private final String pattern;
private final int patternLength;
/**
* The pattern split into segments separated by *
*/
private String segments[];
/**
* StringMatcher constructor takes in a String object that is a simple
* pattern which may contain '*' for 0 and many characters and
* '?' for exactly one character.
*
* Literal '*' and '?' characters must be escaped in the pattern
* e.g., "\*" means literal "*", etc.
*
* Escaping any other character (including the escape character itself),
* just results in that character in the pattern.
* e.g., "\a" means "a" and "\\" means "\"
*
* If invoking the StringMatcher with string literals in Java, don't forget
* escape characters are represented by "\\".
*
* @param pattern the pattern to match text against
*/
public StringMatcher(String pattern) {
if (pattern == null)
throw new IllegalArgumentException();
this.pattern = pattern;
patternLength = pattern.length();
parseWildCards();
}
/**
* @param text a simple regular expression that may only contain '?'(s)
* @param start the starting index in the text for search, inclusive
* @param end the stopping point of search, exclusive
* @param p a simple regular expression that may contain '?'
* @return the starting index in the text of the pattern , or -1 if not found
*/
private int findPosition(String text, int start, int end, String p) {
boolean hasWildCard = p.indexOf(SINGLE_WILD_CARD) >= 0;
int plen = p.length();
for (int i = start, max = end - plen; i <= max; ++i) {
if (hasWildCard) {
if (regExpRegionMatches(text, i, p, 0, plen))
return i;
} else {
if (text.regionMatches(true, i, p, 0, plen))
return i;
}
}
return -1;
}
/**
* Given the starting (inclusive) and the ending (exclusive) positions in the
* <code>text</code>, determine if the given substring matches with aPattern
* @return true if the specified portion of the text matches the pattern
* @param text a String object that contains the substring to match
*/
public boolean match(String text) {
if (text == null)
return false;
final int end = text.length();
final int segmentCount = segments.length;
if (segmentCount == 0 && (hasLeadingStar || hasTrailingStar)) // pattern contains only '*'(s)
return true;
if (end == 0)
return patternLength == 0;
if (patternLength == 0)
return false;
int currentTextPosition = 0;
if ((end - bound) < 0)
return false;
int segmentIndex = 0;
String current = segments[segmentIndex];
/* process first segment */
if (!hasLeadingStar) {
int currentLength = current.length();
if (!regExpRegionMatches(text, 0, current, 0, currentLength))
return false;
segmentIndex++;
currentTextPosition = currentTextPosition + currentLength;
}
if ((segmentCount == 1) && (!hasLeadingStar) && (!hasTrailingStar)) {
// only one segment to match, no wild cards specified
return currentTextPosition == end;
}
/* process middle segments */
while (segmentIndex < segmentCount) {
current = segments[segmentIndex];
int currentMatch = findPosition(text, currentTextPosition, end, current);
if (currentMatch < 0)
return false;
currentTextPosition = currentMatch + current.length();
segmentIndex++;
}
/* process final segment */
if (!hasTrailingStar && currentTextPosition != end) {
int currentLength = current.length();
return regExpRegionMatches(text, end - currentLength, current, 0, currentLength);
}
return segmentIndex == segmentCount;
}
/**
* Parses the pattern into segments separated by wildcard '*' characters.
*/
private void parseWildCards() {
if (pattern.startsWith("*"))//$NON-NLS-1$
hasLeadingStar = true;
if (pattern.endsWith("*")) {//$NON-NLS-1$
/* make sure it's not an escaped wildcard */
if (patternLength > 1 && pattern.charAt(patternLength - 2) != '\\') {
hasTrailingStar = true;
}
}
ArrayList temp = new ArrayList();
int pos = 0;
StringBuffer buf = new StringBuffer();
while (pos < patternLength) {
char c = pattern.charAt(pos++);
switch (c) {
case '\\' :
if (pos >= patternLength) {
buf.append(c);
} else {
char next = pattern.charAt(pos++);
/* if it's an escape sequence */
if (next == '*' || next == '?' || next == '\\') {
buf.append(next);
} else {
/* not an escape sequence, just insert literally */
buf.append(c);
buf.append(next);
}
}
break;
case '*' :
if (buf.length() > 0) {
/* new segment */
temp.add(buf.toString());
bound += buf.length();
buf.setLength(0);
}
break;
case '?' :
/* append special character representing single match wildcard */
buf.append(SINGLE_WILD_CARD);
break;
default :
buf.append(c);
}
}
/* add last buffer to segment list */
if (buf.length() > 0) {
temp.add(buf.toString());
bound += buf.length();
}
segments = (String[])temp.toArray(new String[temp.size()]);
}
/**
*
* @return boolean
* @param text a String to match
* @param tStart the starting index of match, inclusive
* @param p a simple regular expression that may contain '?'
* @param pStart The start position in the pattern
* @param plen The length of the pattern
*/
private boolean regExpRegionMatches(String text, int tStart, String p, int pStart, int plen) {
while (plen-- > 0) {
char tchar = text.charAt(tStart++);
char pchar = p.charAt(pStart++);
// process wild cards, skipping single wild cards
if (pchar == SINGLE_WILD_CARD)
continue;
if (pchar == tchar)
continue;
if (Character.toUpperCase(tchar) == Character.toUpperCase(pchar))
continue;
// comparing after converting to upper case doesn't handle all cases;
// also compare after converting to lower case
if (Character.toLowerCase(tchar) == Character.toLowerCase(pchar))
continue;
return false;
}
return true;
}
}