| /******************************************************************************* |
| * Copyright (c) 2000, 2008 IBM Corporation and others. |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: |
| * IBM Corporation - initial API and implementation |
| *******************************************************************************/ |
| package org.eclipse.search.internal.core.text; |
| |
| import java.util.regex.Pattern; |
| import java.util.regex.PatternSyntaxException; |
| |
| import org.eclipse.jface.text.FindReplaceDocumentAdapter; |
| |
| import org.eclipse.search.internal.ui.SearchMessages; |
| |
| /** |
| * |
| */ |
| public class PatternConstructor { |
| |
| |
| private PatternConstructor() { |
| // don't instantiate |
| } |
| |
| public static Pattern createPattern(String pattern, boolean isCaseSensitive, boolean isRegex) throws PatternSyntaxException { |
| return createPattern(pattern, isRegex, true, isCaseSensitive, false); |
| } |
| |
| /** |
| * Creates a pattern element from the pattern string which is either a reg-ex expression or in our old |
| * 'StringMatcher' format. |
| * @param pattern The search pattern |
| * @param isRegex <code>true</code> if the passed string already is a reg-ex pattern |
| * @param isStringMatcher <code>true</code> if the passed string is in the StringMatcher format. |
| * @param isCaseSensitive Set to <code>true</code> to create a case insensitive pattern |
| * @param isWholeWord <code>true</code> to create a pattern that requires a word boundary at the beginning and the end. |
| * @return The created pattern |
| * @throws PatternSyntaxException |
| */ |
| public static Pattern createPattern(String pattern, boolean isRegex, boolean isStringMatcher, boolean isCaseSensitive, boolean isWholeWord) throws PatternSyntaxException { |
| if (isRegex) { |
| pattern= substituteLinebreak(pattern); |
| if (isWholeWord) { |
| StringBuffer buffer= new StringBuffer(pattern.length() + 10); |
| buffer.append("\\b(?:").append(pattern).append(")\\b"); //$NON-NLS-1$ //$NON-NLS-2$ |
| pattern= buffer.toString(); |
| } |
| } else { |
| int len= pattern.length(); |
| StringBuffer buffer= new StringBuffer(len + 10); |
| // don't add a word boundary if the search text does not start with |
| // a word char. (this works around a user input error). |
| if (isWholeWord && len > 0 && isWordChar(pattern.charAt(0))) { |
| buffer.append("\\b"); //$NON-NLS-1$ |
| } |
| appendAsRegEx(isStringMatcher, pattern, buffer); |
| if (isWholeWord && len > 0 && isWordChar(pattern.charAt(len - 1))) { |
| buffer.append("\\b"); //$NON-NLS-1$ |
| } |
| pattern= buffer.toString(); |
| } |
| |
| int regexOptions= Pattern.MULTILINE; |
| if (!isCaseSensitive) { |
| regexOptions|= Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; |
| } |
| return Pattern.compile(pattern, regexOptions); |
| } |
| |
| /** |
| * Copied from {@link org.eclipse.jface.text.FindReplaceDocumentAdapter}' to support '\R' |
| * @param findString the string to substitute |
| * @return the new string |
| * @throws PatternSyntaxException |
| */ |
| private static String substituteLinebreak(String findString) throws PatternSyntaxException { |
| int length= findString.length(); |
| StringBuffer buf= new StringBuffer(length); |
| |
| int inCharGroup= 0; |
| int inBraces= 0; |
| boolean inQuote= false; |
| for (int i= 0; i < length; i++) { |
| char ch= findString.charAt(i); |
| switch (ch) { |
| case '[': |
| buf.append(ch); |
| if (! inQuote) |
| inCharGroup++; |
| break; |
| |
| case ']': |
| buf.append(ch); |
| if (! inQuote) |
| inCharGroup--; |
| break; |
| |
| case '{': |
| buf.append(ch); |
| if (! inQuote && inCharGroup == 0) |
| inBraces++; |
| break; |
| |
| case '}': |
| buf.append(ch); |
| if (! inQuote && inCharGroup == 0) |
| inBraces--; |
| break; |
| |
| case '\\': |
| if (i + 1 < length) { |
| char ch1= findString.charAt(i + 1); |
| if (inQuote) { |
| if (ch1 == 'E') |
| inQuote= false; |
| buf.append(ch).append(ch1); |
| i++; |
| |
| } else if (ch1 == 'R') { |
| if (inCharGroup > 0 || inBraces > 0) { |
| String msg= SearchMessages.PatternConstructor_error_line_delim_position; |
| throw new PatternSyntaxException(msg, findString, i); |
| } |
| buf.append("(?>\\r\\n?|\\n)"); //$NON-NLS-1$ |
| i++; |
| |
| } else { |
| if (ch1 == 'Q') { |
| inQuote= true; |
| } |
| buf.append(ch).append(ch1); |
| i++; |
| } |
| } else { |
| buf.append(ch); |
| } |
| break; |
| |
| default: |
| buf.append(ch); |
| break; |
| } |
| |
| } |
| return buf.toString(); |
| } |
| |
| |
| private static boolean isWordChar(char c) { |
| return Character.isLetterOrDigit(c); |
| } |
| |
| /** |
| * Creates a pattern element from an array of patterns in the old |
| * 'StringMatcher' format. |
| * @param patterns The search patterns |
| * @param isCaseSensitive Set to <code>true</code> to create a case insensitive pattern |
| * @return The created pattern |
| * @throws PatternSyntaxException |
| */ |
| public static Pattern createPattern(String[] patterns, boolean isCaseSensitive) throws PatternSyntaxException { |
| StringBuffer pattern= new StringBuffer(); |
| for (int i= 0; i < patterns.length; i++) { |
| if (i > 0) { |
| // note that this works only as we know that the operands of the |
| // or expression will be simple and need no brackets. |
| pattern.append('|'); |
| } |
| appendAsRegEx(true, patterns[i], pattern); |
| } |
| return createPattern(pattern.toString(), true, true, isCaseSensitive, false); |
| } |
| |
| |
| public static StringBuffer appendAsRegEx(boolean isStringMatcher, String pattern, StringBuffer buffer) { |
| boolean isEscaped= false; |
| for (int i = 0; i < pattern.length(); i++) { |
| char c = pattern.charAt(i); |
| switch(c) { |
| // the backslash |
| case '\\': |
| // the backslash is escape char in string matcher |
| if (isStringMatcher && !isEscaped) { |
| isEscaped= true; |
| } |
| else { |
| buffer.append("\\\\"); //$NON-NLS-1$ |
| isEscaped= false; |
| } |
| break; |
| // characters that need to be escaped in the regex. |
| case '(': |
| case ')': |
| case '{': |
| case '}': |
| case '.': |
| case '[': |
| case ']': |
| case '$': |
| case '^': |
| case '+': |
| case '|': |
| if (isEscaped) { |
| buffer.append("\\\\"); //$NON-NLS-1$ |
| isEscaped= false; |
| } |
| buffer.append('\\'); |
| buffer.append(c); |
| break; |
| case '?': |
| if (isStringMatcher && !isEscaped) { |
| buffer.append('.'); |
| } |
| else { |
| buffer.append('\\'); |
| buffer.append(c); |
| isEscaped= false; |
| } |
| break; |
| case '*': |
| if (isStringMatcher && !isEscaped) { |
| buffer.append(".*"); //$NON-NLS-1$ |
| } |
| else { |
| buffer.append('\\'); |
| buffer.append(c); |
| isEscaped= false; |
| } |
| break; |
| default: |
| if (isEscaped) { |
| buffer.append("\\\\"); //$NON-NLS-1$ |
| isEscaped= false; |
| } |
| buffer.append(c); |
| break; |
| } |
| } |
| if (isEscaped) { |
| buffer.append("\\\\"); //$NON-NLS-1$ |
| isEscaped= false; |
| } |
| return buffer; |
| } |
| |
| /** |
| * Interprets escaped characters in the given replace pattern. |
| * |
| * @param replaceText the replace pattern |
| * @param foundText the found pattern to be replaced |
| * @param lineDelim the line delimiter to use for \R |
| * @return a replace pattern with escaped characters substituted by the respective characters |
| * @since 3.4 |
| */ |
| public static String interpretReplaceEscapes(String replaceText, String foundText, String lineDelim) { |
| return new ReplaceStringConstructor(lineDelim).interpretReplaceEscapes(replaceText, foundText); |
| } |
| |
| /** |
| * Copied from {@link FindReplaceDocumentAdapter}} |
| * |
| * FindReplaceDocumentAdapter with contributions from: |
| * Cagatay Calli <ccalli@gmail.com> - [find/replace] retain caps when replacing - https://bugs.eclipse.org/bugs/show_bug.cgi?id=28949 |
| * Cagatay Calli <ccalli@gmail.com> - [find/replace] define & fix behavior of retain caps with other escapes and text before \C - https://bugs.eclipse.org/bugs/show_bug.cgi?id=217061 |
| */ |
| private static class ReplaceStringConstructor { |
| |
| private static final int RC_MIXED= 0; |
| private static final int RC_UPPER= 1; |
| private static final int RC_LOWER= 2; |
| private static final int RC_FIRSTUPPER= 3; |
| |
| |
| private int fRetainCaseMode; |
| private final String fLineDelim; |
| |
| public ReplaceStringConstructor(String lineDelim) { |
| fLineDelim= lineDelim; |
| |
| } |
| |
| /** |
| * Interprets escaped characters in the given replace pattern. |
| * |
| * @param replaceText the replace pattern |
| * @param foundText the found pattern to be replaced |
| * @return a replace pattern with escaped characters substituted by the respective characters |
| * @since 3.4 |
| */ |
| private String interpretReplaceEscapes(String replaceText, String foundText) { |
| int length= replaceText.length(); |
| boolean inEscape= false; |
| StringBuffer buf= new StringBuffer(length); |
| |
| /* every string we did not check looks mixed at first |
| * so initialize retain case mode with RC_MIXED |
| */ |
| fRetainCaseMode= RC_MIXED; |
| |
| for (int i= 0; i < length; i++) { |
| final char ch= replaceText.charAt(i); |
| if (inEscape) { |
| i= interpretReplaceEscape(ch, i, buf, replaceText, foundText); |
| inEscape= false; |
| |
| } else if (ch == '\\') { |
| inEscape= true; |
| |
| } else if (ch == '$') { |
| buf.append(ch); |
| |
| /* |
| * Feature in java.util.regex.Matcher#replaceFirst(String): |
| * $00, $000, etc. are interpreted as $0 and |
| * $01, $001, etc. are interpreted as $1, etc. . |
| * If we support \0 as replacement pattern for capturing group 0, |
| * it would not be possible any more to write a replacement pattern |
| * that appends 0 to a capturing group (like $0\0). |
| * The fix is to interpret \00 and $00 as $0\0, and |
| * \01 and $01 as $0\1, etc. |
| */ |
| if (i + 2 < length) { |
| char ch1= replaceText.charAt(i + 1); |
| char ch2= replaceText.charAt(i + 2); |
| if (ch1 == '0' && '0' <= ch2 && ch2 <= '9') { |
| buf.append("0\\"); //$NON-NLS-1$ |
| i++; // consume the 0 |
| } |
| } |
| } else { |
| interpretRetainCase(buf, ch); |
| } |
| } |
| |
| if (inEscape) { |
| // '\' as last character is invalid, but we still add it to get an error message |
| buf.append('\\'); |
| } |
| return buf.toString(); |
| } |
| |
| /** |
| * Interprets the escaped character <code>ch</code> at offset <code>i</code> |
| * of the <code>replaceText</code> and appends the interpretation to <code>buf</code>. |
| * |
| * @param ch the escaped character |
| * @param i the offset |
| * @param buf the output buffer |
| * @param replaceText the original replace pattern |
| * @param foundText the found pattern to be replaced |
| * @return the new offset |
| * @since 3.4 |
| */ |
| private int interpretReplaceEscape(final char ch, int i, StringBuffer buf, String replaceText, String foundText) { |
| int length= replaceText.length(); |
| switch (ch) { |
| case 'r': |
| buf.append('\r'); |
| break; |
| case 'n': |
| buf.append('\n'); |
| break; |
| case 't': |
| buf.append('\t'); |
| break; |
| case 'f': |
| buf.append('\f'); |
| break; |
| case 'a': |
| buf.append('\u0007'); |
| break; |
| case 'e': |
| buf.append('\u001B'); |
| break; |
| case 'R': //see http://www.unicode.org/unicode/reports/tr18/#Line_Boundaries |
| buf.append(fLineDelim); |
| break; |
| /* |
| * \0 for octal is not supported in replace string, since it |
| * would conflict with capturing group \0, etc. |
| */ |
| case '0': |
| buf.append('$').append(ch); |
| /* |
| * See explanation in "Feature in java.util.regex.Matcher#replaceFirst(String)" |
| * in interpretReplaceEscape(String) above. |
| */ |
| if (i + 1 < length) { |
| char ch1= replaceText.charAt(i + 1); |
| if ('0' <= ch1 && ch1 <= '9') { |
| buf.append('\\'); |
| } |
| } |
| break; |
| |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| case '8': |
| case '9': |
| buf.append('$').append(ch); |
| break; |
| |
| case 'c': |
| if (i + 1 < length) { |
| char ch1= replaceText.charAt(i + 1); |
| interpretRetainCase(buf, (char)(ch1 ^ 64)); |
| i++; |
| } else { |
| String msg= SearchMessages.PatternConstructor_error_escape_sequence; |
| throw new PatternSyntaxException(msg, replaceText, i); |
| } |
| break; |
| |
| case 'x': |
| if (i + 2 < length) { |
| int parsedInt; |
| try { |
| parsedInt= Integer.parseInt(replaceText.substring(i + 1, i + 3), 16); |
| if (parsedInt < 0) |
| throw new NumberFormatException(); |
| } catch (NumberFormatException e) { |
| String msg= SearchMessages.PatternConstructor_error_hex_escape_sequence; |
| throw new PatternSyntaxException(msg, replaceText, i); |
| } |
| interpretRetainCase(buf, (char) parsedInt); |
| i+= 2; |
| } else { |
| String msg= SearchMessages.PatternConstructor_error_hex_escape_sequence; |
| throw new PatternSyntaxException(msg, replaceText, i); |
| } |
| break; |
| |
| case 'u': |
| if (i + 4 < length) { |
| int parsedInt; |
| try { |
| parsedInt= Integer.parseInt(replaceText.substring(i + 1, i + 5), 16); |
| if (parsedInt < 0) |
| throw new NumberFormatException(); |
| } catch (NumberFormatException e) { |
| String msg= SearchMessages.PatternConstructor_error_unicode_escape_sequence; |
| throw new PatternSyntaxException(msg, replaceText, i); |
| } |
| interpretRetainCase(buf, (char) parsedInt); |
| i+= 4; |
| } else { |
| String msg= SearchMessages.PatternConstructor_error_unicode_escape_sequence; |
| throw new PatternSyntaxException(msg, replaceText, i); |
| } |
| break; |
| |
| case 'C': |
| if(foundText.toUpperCase().equals(foundText)) // is whole match upper-case? |
| fRetainCaseMode= RC_UPPER; |
| else if (foundText.toLowerCase().equals(foundText)) // is whole match lower-case? |
| fRetainCaseMode= RC_LOWER; |
| else if(Character.isUpperCase(foundText.charAt(0))) // is first character upper-case? |
| fRetainCaseMode= RC_FIRSTUPPER; |
| else |
| fRetainCaseMode= RC_MIXED; |
| break; |
| |
| default: |
| // unknown escape k: append uninterpreted \k |
| buf.append('\\').append(ch); |
| break; |
| } |
| return i; |
| } |
| |
| /** |
| * Interprets current Retain Case mode (all upper-case,all lower-case,capitalized or mixed) |
| * and appends the character <code>ch</code> to <code>buf</code> after processing. |
| * |
| * @param buf the output buffer |
| * @param ch the character to process |
| * @since 3.4 |
| */ |
| private void interpretRetainCase(StringBuffer buf, char ch) { |
| if (fRetainCaseMode == RC_UPPER) |
| buf.append(Character.toUpperCase(ch)); |
| else if (fRetainCaseMode == RC_LOWER) |
| buf.append(Character.toLowerCase(ch)); |
| else if (fRetainCaseMode == RC_FIRSTUPPER) { |
| buf.append(Character.toUpperCase(ch)); |
| fRetainCaseMode= RC_MIXED; |
| } else |
| buf.append(ch); |
| } |
| |
| } |
| } |