| /******************************************************************************* |
| * Copyright (c) 2000, 2020 IBM Corporation and others. |
| * |
| * This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License 2.0 |
| * which accompanies this distribution, and is available at |
| * https://www.eclipse.org/legal/epl-2.0/ |
| * |
| * SPDX-License-Identifier: EPL-2.0 |
| * |
| * Contributors: |
| * IBM Corporation - initial API and implementation |
| * Lucas Bullen (Red Hat Inc.) - [Bug 203792] filter should support multiple keywords |
| * Mickael Istria (Red Hat Inc.) - [534277] erroneous filtering with multiple words |
| *******************************************************************************/ |
| package org.eclipse.ui.internal.misc; |
| |
| import java.util.ArrayList; |
| import java.util.regex.Pattern; |
| |
| /** |
| * A string pattern matcher, supporting "*" and "?" wildcards. |
| */ |
| public class StringMatcher { |
| protected String fPattern; |
| |
| protected int fLength; // pattern length |
| |
| protected boolean fIgnoreWildCards; |
| |
| protected boolean fIgnoreCase; |
| |
| protected String[] patternWords; |
| |
| protected Word wholePatternWord; |
| protected Word[] splittedPatternWords; |
| |
| protected static final char fSingleWildCard = '\u0000'; |
| private static final Pattern NON_WORD = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS); //$NON-NLS-1$ |
| |
| class Word { |
| private boolean hasTrailingStar = false; |
| private boolean hasLeadingStar = false; |
| private int bound = 0; |
| private String[] fragments = null; |
| private final String pattern; |
| |
| Word(String pattern) { |
| this.pattern = pattern; |
| } |
| |
| public Word(String pattern, int fLength, String[] wordsSplitted) { |
| this(pattern); |
| this.bound = fLength; |
| this.fragments = wordsSplitted; |
| } |
| |
| private void parseWildcards() { |
| if (this.pattern.startsWith("*")) { //$NON-NLS-1$ |
| this.hasLeadingStar = true; |
| } |
| if (this.pattern.endsWith("*")) {//$NON-NLS-1$ |
| /* make sure it's not an escaped wildcard */ |
| if (this.pattern.length() > 1 && this.pattern.charAt(this.pattern.length() - 2) != '\\') { |
| this.hasTrailingStar = true; |
| } |
| } |
| |
| ArrayList<String> temp = new ArrayList<>(); |
| |
| int pos = 0; |
| StringBuilder buf = new StringBuilder(); |
| while (pos < this.pattern.length()) { |
| char c = this.pattern.charAt(pos++); |
| switch (c) { |
| case '\\': |
| if (pos >= this.pattern.length()) { |
| buf.append(c); |
| } else { |
| char next = this.pattern.charAt(pos++); |
| /* if it's an escape sequence */ |
| if (next == '*' || next == '?' || next == '\\') { |
| buf.append(next); |
| } else { |
| /* not an escape sequence, just insert literally */ |
| buf.append(c); |
| buf.append(next); |
| } |
| } |
| break; |
| case '*': |
| if (buf.length() > 0) { |
| /* new segment */ |
| temp.add(buf.toString()); |
| this.bound += buf.length(); |
| buf.setLength(0); |
| } |
| break; |
| case '?': |
| /* append special character representing single match wildcard */ |
| buf.append(fSingleWildCard); |
| break; |
| default: |
| buf.append(c); |
| } |
| } |
| |
| /* add last buffer to segment list */ |
| if (buf.length() > 0) { |
| temp.add(buf.toString()); |
| this.bound += buf.length(); |
| } |
| this.fragments = temp.toArray(new String[temp.size()]); |
| } |
| |
| boolean match(String text, int start, int end) { |
| boolean found = true; |
| if (fIgnoreWildCards) { |
| if ((end - start == this.pattern.length()) |
| && this.pattern.regionMatches(fIgnoreCase, 0, text, start, this.pattern.length())) |
| return true; |
| return false; |
| } |
| String[] segments = null; |
| segments = this.fragments; |
| int segCount = segments.length; |
| if (segCount == 0 && (this.hasLeadingStar || this.hasTrailingStar)) { |
| return true; |
| } |
| if (start == end) { |
| if (this.pattern.length() == 0) |
| return true; |
| return false; |
| } |
| if (this.pattern.length() == 0) { |
| if (start == end) |
| return true; |
| return false; |
| } |
| |
| int tCurPos = start; |
| int bound = end - this.bound; |
| if (bound < 0) { |
| return false; |
| } |
| int i = 0; |
| String current = segments[i]; |
| int segLength = current.length(); |
| |
| /* process first segment */ |
| if (!hasLeadingStar) { |
| if (!regExpRegionMatches(text, start, current, 0, segLength)) { |
| return false; |
| } |
| ++i; |
| tCurPos = tCurPos + segLength; |
| } |
| if ((segments.length == 1) && (!hasLeadingStar) && (!hasTrailingStar)) { |
| // only one segment to match, no wildcards specified |
| if (tCurPos == end) |
| return true; |
| return false; |
| } |
| /* process middle segments */ |
| while (i < segCount && found) { |
| current = segments[i]; |
| int currentMatch; |
| int k = current.indexOf(fSingleWildCard); |
| if (k < 0) { |
| currentMatch = textPosIn(text, tCurPos, end, current); |
| if (currentMatch < 0) { |
| found = false; |
| } |
| } else { |
| currentMatch = regExpPosIn(text, tCurPos, end, current); |
| if (currentMatch < 0) { |
| found = false; |
| } |
| } |
| if (!found) |
| return false; |
| tCurPos = currentMatch + current.length(); |
| i++; |
| } |
| |
| /* process final segment */ |
| if (!hasTrailingStar && tCurPos != end) { |
| int clen = current.length(); |
| if (regExpRegionMatches(text, end - clen, current, 0, clen)) |
| return true; |
| return false; |
| } |
| if (i == segCount) |
| return true; |
| return false; |
| } |
| |
| /** |
| * @param text |
| * @param start |
| * @param end |
| * @return whether the current pattern word matches at least one word in the |
| * given text |
| */ |
| public boolean matchTextWord(String text, int start, int end) { |
| String[] textWords = getWords(text.substring(start, end)); |
| if (textWords.length == 0) { |
| return pattern.isEmpty(); |
| } |
| for (String subword : textWords) { |
| if (match(subword, 0, subword.length())) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| } |
| |
| /** |
| * StringMatcher constructor takes in a String object that is a simple pattern |
| * which may contain '*' for 0 and many characters and '?' for exactly one |
| * character. |
| * |
| * Literal '*' and '?' characters must be escaped in the pattern e.g., "\*" |
| * means literal "*", etc. |
| * |
| * Escaping any other character (including the escape character itself), just |
| * results in that character in the pattern. e.g., "\a" means "a" and "\\" means |
| * "\" |
| * |
| * If invoking the StringMatcher with string literals in Java, don't forget |
| * escape characters are represented by "\\". |
| * |
| * @param pattern the pattern to match text against |
| * @param ignoreCase if true, case is ignored |
| * @param ignoreWildCards if true, wild cards and their escape sequences are |
| * ignored (everything is taken literally). |
| */ |
| public StringMatcher(String pattern, boolean ignoreCase, boolean ignoreWildCards) { |
| if (pattern == null) { |
| throw new IllegalArgumentException(); |
| } |
| fIgnoreCase = ignoreCase; |
| fIgnoreWildCards = ignoreWildCards; |
| fPattern = pattern; |
| fLength = pattern.length(); |
| |
| parsePatternIntoWords(); |
| |
| if (fIgnoreWildCards) { |
| parseNoWildCards(); |
| } else { |
| if (wholePatternWord != null) { |
| wholePatternWord.parseWildcards(); |
| } |
| if (splittedPatternWords != null && splittedPatternWords.length > 1) { |
| for (Word word : splittedPatternWords) { |
| word.parseWildcards(); |
| } |
| } |
| } |
| } |
| |
| /** |
| * match the given <code>text</code> with the pattern |
| * |
| * @return true if matched otherwise false |
| * @param text a String object |
| */ |
| public boolean match(String text) { |
| if (text == null) { |
| return false; |
| } |
| return match(text, 0, text.length()); |
| } |
| |
| /** |
| * Given the starting (inclusive) and the ending (exclusive) positions in the |
| * <code>text</code>, determine if the given substring matches with aPattern |
| * |
| * @return true if the specified portion of the text matches the pattern |
| * @param text a String object that contains the substring to match |
| * @param start marks the starting position (inclusive) of the substring |
| * @param end marks the ending index (exclusive) of the substring |
| */ |
| public boolean match(String text, int start, int end) { |
| if (null == text) { |
| throw new IllegalArgumentException(); |
| } |
| if (start > end) { |
| return false; |
| } |
| int tlen = text.length(); |
| start = Math.max(0, start); |
| end = Math.min(end, tlen); |
| |
| if (wholePatternWord != null |
| && (wholePatternWord.match(text, start, end) || wholePatternWord.matchTextWord(text, start, end))) { |
| return true; |
| } |
| if (splittedPatternWords != null && splittedPatternWords.length > 0) { |
| for (Word word : splittedPatternWords) { |
| if (!word.match(text, start, end) && !word.matchTextWord(text, start, end)) { |
| return false; |
| } |
| } |
| return true; |
| } |
| return false; |
| } |
| |
| /** |
| * This method parses the given pattern into words separated by spaces |
| * characters. Since wildcards are not being used in this case, the pattern |
| * consists of a single segment. |
| */ |
| private void parsePatternIntoWords() { |
| String trimedPattern = fPattern.trim(); |
| if (!trimedPattern.isEmpty()) { |
| this.wholePatternWord = new Word(trimedPattern); |
| patternWords = trimedPattern.split("\\s+"); //$NON-NLS-1$ |
| if (patternWords.length > 1) { |
| this.splittedPatternWords = new Word[patternWords.length]; |
| for (int i = 0; i < patternWords.length; i++) { |
| String patternWord = patternWords[i]; |
| if (!patternWord.endsWith("*")) { //$NON-NLS-1$ |
| patternWord += '*'; |
| } |
| this.splittedPatternWords[i] = new Word(patternWord); |
| // words may be found anywhere in the line |
| } |
| } |
| } |
| } |
| |
| /** |
| * This method parses the given pattern into segments seperated by wildcard '*' |
| * characters. Since wildcards are not being used in this case, the pattern |
| * consists of a single segment. |
| */ |
| private void parseNoWildCards() { |
| this.wholePatternWord = new Word(fPattern, fLength, patternWords); |
| this.wholePatternWord.bound = fLength; |
| this.wholePatternWord.fragments = patternWords; |
| } |
| |
| /** |
| * @param text a string which contains no wildcard |
| * @param start the starting index in the text for search, inclusive |
| * @param end the stopping point of search, exclusive |
| * @return the starting index in the text of the pattern , or -1 if not found |
| */ |
| protected int posIn(String text, int start, int end) {// no wild card in pattern |
| int max = end - fLength; |
| |
| if (!fIgnoreCase) { |
| int i = text.indexOf(fPattern, start); |
| if (i == -1 || i > max) { |
| return -1; |
| } |
| return i; |
| } |
| |
| for (int i = start; i <= max; ++i) { |
| if (text.regionMatches(true, i, fPattern, 0, fLength)) { |
| return i; |
| } |
| } |
| |
| return -1; |
| } |
| |
| /** |
| * @param text a simple regular expression that may only contain '?'(s) |
| * @param start the starting index in the text for search, inclusive |
| * @param end the stopping point of search, exclusive |
| * @param p a simple regular expression that may contains '?' |
| * @return the starting index in the text of the pattern , or -1 if not found |
| */ |
| protected int regExpPosIn(String text, int start, int end, String p) { |
| int plen = p.length(); |
| |
| int max = end - plen; |
| for (int i = start; i <= max; ++i) { |
| if (regExpRegionMatches(text, i, p, 0, plen)) { |
| return i; |
| } |
| } |
| return -1; |
| } |
| |
| /** |
| * |
| * @return boolean |
| * @param text a String to match |
| * @param start int that indicates the starting index of match, inclusive |
| * @param end int that indicates the ending index of match, exclusive |
| * @param p String, String, a simple regular expression that may |
| * contain '?' |
| * @param ignoreCase boolean indicating whether <code>p</code> is case sensitive |
| */ |
| protected boolean regExpRegionMatches(String text, int tStart, String p, int pStart, int plen) { |
| while (plen-- > 0) { |
| char tchar = text.charAt(tStart++); |
| char pchar = p.charAt(pStart++); |
| |
| /* process wild cards */ |
| if (!fIgnoreWildCards) { |
| /* skip single wild cards */ |
| if (pchar == fSingleWildCard) { |
| continue; |
| } |
| } |
| if (pchar == tchar) { |
| continue; |
| } |
| if (fIgnoreCase) { |
| if (Character.toUpperCase(tchar) == Character.toUpperCase(pchar)) { |
| continue; |
| } |
| // comparing after converting to upper case doesn't handle all cases; |
| // also compare after converting to lower case |
| if (Character.toLowerCase(tchar) == Character.toLowerCase(pchar)) { |
| continue; |
| } |
| } |
| return false; |
| } |
| return true; |
| } |
| |
| /** |
| * @param text the string to match |
| * @param start the starting index in the text for search, inclusive |
| * @param end the stopping point of search, exclusive |
| * @param p a pattern string that has no wildcard |
| * @return the starting index in the text of the pattern , or -1 if not found |
| */ |
| protected int textPosIn(String text, int start, int end, String p) { |
| |
| int plen = p.length(); |
| int max = end - plen; |
| |
| if (!fIgnoreCase) { |
| int i = text.indexOf(p, start); |
| if (i == -1 || i > max) { |
| return -1; |
| } |
| return i; |
| } |
| |
| for (int i = start; i <= max; ++i) { |
| if (text.regionMatches(true, i, p, 0, plen)) { |
| return i; |
| } |
| } |
| |
| return -1; |
| } |
| |
| /** |
| * Take the given filter text and break it down into words using a |
| * BreakIterator. |
| * |
| * @param text |
| * @return an array of words |
| */ |
| public static String[] getWords(String text) { |
| |
| return NON_WORD.split(text, 0); |
| } |
| |
| } |