| /******************************************************************************* |
| * Copyright (c) 2000, 2009 IBM Corporation and others. |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: |
| * IBM Corporation - initial API and implementation |
| *******************************************************************************/ |
| package org.eclipse.jdt.internal.core.search; |
| |
| import org.eclipse.jdt.core.compiler.CharOperation; |
| import org.eclipse.jdt.internal.compiler.parser.ScannerHelper; |
| |
| /** |
| * This class is a collection of helper methods to manipulate strings during search. |
| */ |
| public final class StringOperation { |
| |
| private final static int[] EMPTY_REGIONS = new int[0]; |
| |
| /** |
| * Answers all the regions in a given name matching a given camel case pattern. |
| * </p><p> |
| * Each of these regions is made of its starting index and its length in the given |
| * name. They are all concatenated in a single array of <code>int</code> |
| * which therefore always has an even length. |
| * </p><p> |
| * Note that each region is disjointed from the following one.<br> |
| * E.g. if the regions are <code>{ start1, length1, start2, length2 }</code>, |
| * then <code>start1+length1</code> will always be smaller than |
| * <code>start2</code>. |
| * </p><p> |
| * <pre> |
| * Examples: |
| * <ol><li> pattern = "NPE" |
| * name = NullPointerException / NoPermissionException |
| * result: { 0, 1, 4, 1, 11, 1 } / { 0, 1, 2, 1, 12, 1 } </li> |
| * <li> pattern = "NuPoEx" |
| * name = NullPointerException |
| * result: { 0, 2, 4, 2, 11, 2 }</li> |
| * <li> pattern = "IPL3" |
| * name = "IPerspectiveListener3" |
| * result: { 0, 2, 12, 1, 20, 1 }</li> |
| * <li> pattern = "HashME" |
| * name = "HashMapEntry" |
| * result: { 0, 5, 7, 1 }</li> |
| * </ol></pre> |
| *</p> |
| * @see CharOperation#camelCaseMatch(char[], int, int, char[], int, int, boolean) |
| * for more details on the camel case behavior |
| * @see CharOperation#match(char[], char[], boolean) for more details on the |
| * pattern match behavior |
| * |
| * @param pattern the given pattern |
| * @param patternStart the start index of the pattern, inclusive |
| * @param patternEnd the end index of the pattern, exclusive |
| * @param name the given name |
| * @param nameStart the start index of the name, inclusive |
| * @param nameEnd the end index of the name, exclusive |
| * @param samePartCount flag telling whether the pattern and the name should |
| * have the same count of parts or not.<br> |
| * For example: |
| * <ul> |
| * <li>'HM' type string pattern will match 'HashMap' and 'HtmlMapper' types, |
| * but not 'HashMapEntry'</li> |
| * <li>'HMap' type string pattern will still match previous 'HashMap' and |
| * 'HtmlMapper' types, but not 'HighMagnitude'</li> |
| * </ul> |
| * @return an array of <code>int</code> having two slots per returned |
| * regions (first one is the starting index of the region and the second |
| * one the length of the region).<br> |
| * Note that it may be <code>null</code> if the given name does not match |
| * the pattern |
| * @since 3.5 |
| */ |
| public static final int[] getCamelCaseMatchingRegions(String pattern, int patternStart, int patternEnd, String name, int nameStart, int nameEnd, boolean samePartCount) { |
| |
| /* !!!!!!!!!! WARNING !!!!!!!!!! |
| * The algorithm used in this method has been fully inspired from |
| * CharOperation#camelCaseMatch(char[], int, int, char[], int, int, boolean). |
| * |
| * So, if any change needs to be applied in the algorithm, do NOT forget |
| * to backport it in the CharOperation method! |
| */ |
| |
| if (name == null) |
| return null; // null name cannot match |
| if (pattern == null) { |
| // null pattern cannot match any region |
| // see bug https://bugs.eclipse.org/bugs/show_bug.cgi?id=264816 |
| return EMPTY_REGIONS; |
| } |
| if (patternEnd < 0) patternEnd = pattern.length(); |
| if (nameEnd < 0) nameEnd = name.length(); |
| |
| if (patternEnd <= patternStart) { |
| return nameEnd <= nameStart |
| ? new int[] { patternStart, patternEnd-patternStart } |
| : null; |
| } |
| if (nameEnd <= nameStart) return null; |
| // check first pattern char |
| if (name.charAt(nameStart) != pattern.charAt(patternStart)) { |
| // first char must strictly match (upper/lower) |
| return null; |
| } |
| |
| char patternChar, nameChar; |
| int iPattern = patternStart; |
| int iName = nameStart; |
| |
| // init segments |
| int parts = 1; |
| for (int i=patternStart+1; i<patternEnd; i++) { |
| final char ch = pattern.charAt(i); |
| if (ch < ScannerHelper.MAX_OBVIOUS) { |
| if ((ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[ch] & (ScannerHelper.C_UPPER_LETTER | ScannerHelper.C_DIGIT)) != 0) { |
| parts++; |
| } |
| } else if (Character.isJavaIdentifierPart(ch) && (Character.isUpperCase(ch) || Character.isDigit(ch))) { |
| parts++; |
| } |
| } |
| int[] segments = null; |
| int count = 0; // count |
| |
| // Main loop is on pattern characters |
| int segmentStart = iName; |
| while (true) { |
| iPattern++; |
| iName++; |
| |
| if (iPattern == patternEnd) { // we have exhausted pattern... |
| // it's a match if the name can have additional parts (i.e. uppercase characters) or is also exhausted |
| if (!samePartCount || iName == nameEnd) { |
| if (segments == null) { |
| segments = new int[2]; |
| } |
| segments[count++] = segmentStart; |
| segments[count++] = iName - segmentStart; |
| if (count < segments.length) { |
| System.arraycopy(segments, 0, segments = new int[count], 0, count); |
| } |
| return segments; |
| } |
| |
| // otherwise it's a match only if the name has no more uppercase characters |
| int segmentEnd = iName; |
| while (true) { |
| if (iName == nameEnd) { |
| // we have exhausted the name, so it's a match |
| if (segments == null) { |
| segments = new int[2]; |
| } |
| segments[count++] = segmentStart; |
| segments[count++] = segmentEnd - segmentStart; |
| if (count < segments.length) { |
| System.arraycopy(segments, 0, segments = new int[count], 0, count); |
| } |
| return segments; |
| } |
| nameChar = name.charAt(iName); |
| // test if the name character is uppercase |
| if (nameChar < ScannerHelper.MAX_OBVIOUS) { |
| if ((ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[nameChar] & ScannerHelper.C_UPPER_LETTER) != 0) { |
| return null; |
| } |
| } |
| else if (!Character.isJavaIdentifierPart(nameChar) || Character.isUpperCase(nameChar)) { |
| return null; |
| } |
| iName++; |
| } |
| } |
| |
| if (iName == nameEnd){ |
| // We have exhausted the name (and not the pattern), so it's not a match |
| return null; |
| } |
| |
| // For as long as we're exactly matching, bring it on (even if it's a lower case character) |
| if ((patternChar = pattern.charAt(iPattern)) == name.charAt(iName)) { |
| continue; |
| } |
| int segmentEnd = iName; |
| |
| // If characters are not equals, then it's not a match if patternChar is lowercase |
| if (patternChar < ScannerHelper.MAX_OBVIOUS) { |
| if ((ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[patternChar] & (ScannerHelper.C_UPPER_LETTER | ScannerHelper.C_DIGIT)) == 0) { |
| return null; |
| } |
| } else if (Character.isJavaIdentifierPart(patternChar) && !Character.isUpperCase(patternChar) && !Character.isDigit(patternChar)) { |
| return null; |
| } |
| |
| // patternChar is uppercase, so let's find the next uppercase in name |
| while (true) { |
| if (iName == nameEnd){ |
| // We have exhausted name (and not pattern), so it's not a match |
| return null; |
| } |
| |
| nameChar = name.charAt(iName); |
| if (nameChar < ScannerHelper.MAX_OBVIOUS) { |
| int charNature = ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[nameChar]; |
| if ((charNature & (ScannerHelper.C_LOWER_LETTER | ScannerHelper.C_SPECIAL)) != 0) { |
| // nameChar is lowercase |
| iName++; |
| } else if ((charNature & ScannerHelper.C_DIGIT) != 0) { |
| // nameChar is digit => break if the digit is current pattern character otherwise consume it |
| if (patternChar == nameChar) break; |
| iName++; |
| // nameChar is uppercase... |
| } else if (patternChar != nameChar) { |
| //.. and it does not match patternChar, so it's not a match |
| return null; |
| } else { |
| //.. and it matched patternChar. Back to the big loop |
| break; |
| } |
| } |
| // Same tests for non-obvious characters |
| else if (Character.isJavaIdentifierPart(nameChar) && !Character.isUpperCase(nameChar)) { |
| iName++; |
| } else if (Character.isDigit(nameChar)) { |
| if (patternChar == nameChar) break; |
| iName++; |
| } else if (patternChar != nameChar) { |
| return null; |
| } else { |
| break; |
| } |
| } |
| // At this point, either name has been exhausted, or it is at an uppercase letter. |
| // Since pattern is also at an uppercase letter |
| if (segments == null) { |
| segments = new int[parts*2]; |
| } |
| segments[count++] = segmentStart; |
| segments[count++] = segmentEnd - segmentStart; |
| segmentStart = iName; |
| } |
| } |
| |
| /** |
| * Answers all the regions in a given name matching a given <i>pattern</i> |
| * pattern (e.g. "H*M??"). |
| * </p><p> |
| * Each of these regions is made of its starting index and its length in the given |
| * name. They are all concatenated in a single array of <code>int</code> |
| * which therefore always has an even length. |
| * </p><p> |
| * Note that each region is disjointed from the following one.<br> |
| * E.g. if the regions are <code>{ start1, length1, start2, length2 }</code>, |
| * then <code>start1+length1</code> will always be smaller than |
| * <code>start2</code>. |
| * </p><p> |
| * <pre> |
| * Examples: |
| * <ol> |
| * <li> pattern = "N???Po*Ex?eption" |
| * name = NullPointerException |
| * result: { 0, 1, 4, 2, 11, 2, 14, 6 }</li> |
| * <li> pattern = "Ha*M*ent*" |
| * name = "HashMapEntry" |
| * result: { 0, 2, 4, 1, 7, 3 }</li> |
| * </ol></pre> |
| *</p> |
| * @see CharOperation#match(char[], char[], boolean) for more details on the |
| * pattern match behavior |
| * |
| * @param pattern the given pattern |
| * @param patternStart the given pattern start |
| * @param patternEnd the given pattern end |
| * @param name the given name |
| * @param nameStart the given name start |
| * @param nameEnd the given name end |
| * @param isCaseSensitive flag to know if the matching should be case sensitive |
| * @return an array of <code>int</code> having two slots per returned |
| * regions (first one is the starting index of the region and the second |
| * one the length of the region).<br> |
| * Note that it may be <code>null</code> if the given name does not match |
| * the pattern |
| * @since 3.5 |
| */ |
| public static final int[] getPatternMatchingRegions( |
| String pattern, |
| int patternStart, |
| int patternEnd, |
| String name, |
| int nameStart, |
| int nameEnd, |
| boolean isCaseSensitive) { |
| |
| /* !!!!!!!!!! WARNING !!!!!!!!!! |
| * The algorithm used in this method has been fully inspired from |
| * CharOperation#match(char[], int, int, char[], int, int, boolean). |
| * |
| * So, if any change needs to be applied in the algorithm, do NOT forget |
| * to backport it in the CharOperation method! |
| */ |
| |
| if (name == null) return null; // null name cannot match |
| if (pattern == null) { |
| // null pattern cannot match any region |
| // see bug https://bugs.eclipse.org/bugs/show_bug.cgi?id=264816 |
| return EMPTY_REGIONS; |
| } |
| int iPattern = patternStart; |
| int iName = nameStart; |
| |
| // init segments parts |
| if (patternEnd < 0) |
| patternEnd = pattern.length(); |
| if (nameEnd < 0) |
| nameEnd = name.length(); |
| int questions = 0; |
| int stars = 0; |
| int parts = 0; |
| char previous = 0; |
| for (int i=patternStart; i<patternEnd; i++) { |
| char ch = pattern.charAt(i); |
| switch (ch) { |
| case '?': |
| questions++; |
| break; |
| case '*': |
| stars++; |
| break; |
| default: |
| switch (previous) { |
| case 0 : |
| case '?': |
| case '*': |
| parts++; |
| break; |
| } |
| } |
| previous = ch; |
| } |
| if (parts == 0) { |
| if (questions <= (nameEnd - nameStart)) return EMPTY_REGIONS; |
| return null; |
| } |
| int[] segments = new int[parts*2]; |
| |
| /* check first segment */ |
| int count = 0; |
| int start = iName; |
| char patternChar = 0; |
| previous = 0; |
| while ((iPattern < patternEnd) |
| && (patternChar = pattern.charAt(iPattern)) != '*') { |
| if (iName == nameEnd) |
| return null; |
| if (patternChar == '?') { |
| switch (previous) { |
| case 0: |
| case '?': |
| break; |
| default: |
| segments[count++] = start; |
| segments[count++] = iPattern-start; |
| break; |
| } |
| } else { |
| if (isCaseSensitive) { |
| if (patternChar != name.charAt(iName)) { |
| return null; |
| } |
| } else if (ScannerHelper.toLowerCase(patternChar) != ScannerHelper.toLowerCase(name.charAt(iName))) { |
| return null; |
| } |
| switch (previous) { |
| case 0: |
| case '?': |
| start = iPattern; |
| break; |
| } |
| } |
| iName++; |
| iPattern++; |
| previous = patternChar; |
| } |
| /* check sequence of star+segment */ |
| int segmentStart; |
| if (patternChar == '*') { |
| if (iPattern > 0 && previous != '?') { |
| segments[count++] = start; |
| segments[count++] = iName-start; |
| start = iName; |
| } |
| segmentStart = ++iPattern; // skip star |
| } else { |
| if (iName == nameEnd) { |
| if (count == (parts*2)) return segments; |
| int end = patternEnd; |
| if (previous == '?') { // last char was a '?' => purge all trailing '?' |
| while (pattern.charAt(--end-1) == '?') { |
| if (end == start) { |
| return new int[] { patternStart, patternEnd-patternStart }; |
| } |
| } |
| } |
| return new int[] { start, end-start }; |
| } |
| return null; |
| } |
| int prefixStart = iName; |
| int previousCount = count; |
| previous = patternChar; |
| char previousSegment = patternChar; |
| checkSegment : while (iName < nameEnd) { |
| if (iPattern == patternEnd) { |
| iPattern = segmentStart; // mismatch - restart current segment |
| iName = ++prefixStart; |
| previous = previousSegment; |
| continue checkSegment; |
| } |
| /* segment is ending */ |
| if ((patternChar = pattern.charAt(iPattern)) == '*') { |
| segmentStart = ++iPattern; // skip star |
| if (segmentStart == patternEnd) { |
| if (count < (parts*2)) { |
| segments[count++] = start; |
| segments[count++] = iName-start; |
| } |
| return segments; |
| } |
| switch (previous) { |
| case '*': |
| case '?': |
| break; |
| default: |
| segments[count++] = start; |
| segments[count++] = iName-start; |
| break; |
| } |
| prefixStart = iName; |
| start = prefixStart; |
| previous = patternChar; |
| previousSegment = patternChar; |
| continue checkSegment; |
| } |
| /* check current name character */ |
| previousCount = count; |
| if (patternChar == '?') { |
| switch (previous) { |
| case '*': |
| case '?': |
| break; |
| default: |
| segments[count++] = start; |
| segments[count++] = iName-start; |
| break; |
| } |
| } else { |
| boolean mismatch; |
| if (isCaseSensitive) { |
| mismatch = name.charAt(iName) != patternChar; |
| } else { |
| mismatch = ScannerHelper.toLowerCase(name.charAt(iName)) != ScannerHelper.toLowerCase(patternChar); |
| } |
| if (mismatch) { |
| iPattern = segmentStart; // mismatch - restart current segment |
| iName = ++prefixStart; |
| start = prefixStart; |
| count = previousCount; |
| previous = previousSegment; |
| continue checkSegment; |
| } |
| switch (previous) { |
| case '?': |
| start = iName; |
| break; |
| } |
| } |
| iName++; |
| iPattern++; |
| previous = patternChar; |
| } |
| |
| if ((segmentStart == patternEnd) |
| || (iName == nameEnd && iPattern == patternEnd) |
| || (iPattern == patternEnd - 1 && pattern.charAt(iPattern) == '*')) { |
| if (count < (parts*2)) { |
| segments[count++] = start; |
| segments[count++] = iName-start; |
| } |
| return segments; |
| } |
| return null; |
| } |
| } |