| /******************************************************************************* |
| * Copyright (c) 2000, 2008 IBM Corporation and others. |
| * All rights reserved. This program and the accompanying materials |
| * are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at |
| * http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: |
| * IBM Corporation - initial API and implementation |
| * Cagatay Calli <ccalli@gmail.com> - [find/replace] retain caps when replacing - https://bugs.eclipse.org/bugs/show_bug.cgi?id=28949 |
| * Cagatay Calli <ccalli@gmail.com> - [find/replace] define & fix behavior of retain caps with other escapes and text before \C - https://bugs.eclipse.org/bugs/show_bug.cgi?id=217061 |
| *******************************************************************************/ |
| package org.eclipse.jface.text; |
| |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| import java.util.regex.PatternSyntaxException; |
| |
| import org.eclipse.core.runtime.Assert; |
| |
| |
| /** |
| * Provides search and replace operations on |
| * {@link org.eclipse.jface.text.IDocument}. |
| * <p> |
| * Replaces |
| * {@link org.eclipse.jface.text.IDocument#search(int, String, boolean, boolean, boolean)}. |
| * |
| * @since 3.0 |
| */ |
| public class FindReplaceDocumentAdapter implements CharSequence { |
| |
| /** |
| * Internal type for operation codes. |
| */ |
| private static class FindReplaceOperationCode { |
| } |
| |
| // Find/replace operation codes. |
| private static final FindReplaceOperationCode FIND_FIRST= new FindReplaceOperationCode(); |
| private static final FindReplaceOperationCode FIND_NEXT= new FindReplaceOperationCode(); |
| private static final FindReplaceOperationCode REPLACE= new FindReplaceOperationCode(); |
| private static final FindReplaceOperationCode REPLACE_FIND_NEXT= new FindReplaceOperationCode(); |
| |
| /** |
| * Retain case mode constants. |
| * @since 3.4 |
| */ |
| private static final int RC_MIXED= 0; |
| private static final int RC_UPPER= 1; |
| private static final int RC_LOWER= 2; |
| private static final int RC_FIRSTUPPER= 3; |
| |
| |
| /** |
| * The adapted document. |
| */ |
| private IDocument fDocument; |
| |
| /** |
| * State for findReplace. |
| */ |
| private FindReplaceOperationCode fFindReplaceState= null; |
| |
| /** |
| * The matcher used in findReplace. |
| */ |
| private Matcher fFindReplaceMatcher; |
| |
| /** |
| * The match offset from the last findReplace call. |
| */ |
| private int fFindReplaceMatchOffset; |
| |
| /** |
| * Retain case mode |
| */ |
| private int fRetainCaseMode; |
| |
| /** |
| * Constructs a new find replace document adapter. |
| * |
| * @param document the adapted document |
| */ |
| public FindReplaceDocumentAdapter(IDocument document) { |
| Assert.isNotNull(document); |
| fDocument= document; |
| } |
| |
| /** |
| * Returns the location of a given string in this adapter's document based on a set of search criteria. |
| * |
| * @param startOffset document offset at which search starts |
| * @param findString the string to find |
| * @param forwardSearch the search direction |
| * @param caseSensitive indicates whether lower and upper case should be distinguished |
| * @param wholeWord indicates whether the findString should be limited by white spaces as |
| * defined by Character.isWhiteSpace. Must not be used in combination with <code>regExSearch</code>. |
| * @param regExSearch if <code>true</code> findString represents a regular expression |
| * Must not be used in combination with <code>wholeWord</code>. |
| * @return the find or replace region or <code>null</code> if there was no match |
| * @throws BadLocationException if startOffset is an invalid document offset |
| * @throws PatternSyntaxException if a regular expression has invalid syntax |
| */ |
| public IRegion find(int startOffset, String findString, boolean forwardSearch, boolean caseSensitive, boolean wholeWord, boolean regExSearch) throws BadLocationException { |
| Assert.isTrue(!(regExSearch && wholeWord)); |
| |
| // Adjust offset to special meaning of -1 |
| if (startOffset == -1 && forwardSearch) |
| startOffset= 0; |
| if (startOffset == -1 && !forwardSearch) |
| startOffset= length() - 1; |
| |
| return findReplace(FIND_FIRST, startOffset, findString, null, forwardSearch, caseSensitive, wholeWord, regExSearch); |
| } |
| |
| /** |
| * Stateful findReplace executes a FIND, REPLACE, REPLACE_FIND or FIND_FIRST operation. |
| * In case of REPLACE and REPLACE_FIND it sends a <code>DocumentEvent</code> to all |
| * registered <code>IDocumentListener</code>. |
| * |
| * @param startOffset document offset at which search starts |
| * this value is only used in the FIND_FIRST operation and otherwise ignored |
| * @param findString the string to find |
| * this value is only used in the FIND_FIRST operation and otherwise ignored |
| * @param replaceText the string to replace the current match |
| * this value is only used in the REPLACE and REPLACE_FIND operations and otherwise ignored |
| * @param forwardSearch the search direction |
| * @param caseSensitive indicates whether lower and upper case should be distinguished |
| * @param wholeWord indicates whether the findString should be limited by white spaces as |
| * defined by Character.isWhiteSpace. Must not be used in combination with <code>regExSearch</code>. |
| * @param regExSearch if <code>true</code> this operation represents a regular expression |
| * Must not be used in combination with <code>wholeWord</code>. |
| * @param operationCode specifies what kind of operation is executed |
| * @return the find or replace region or <code>null</code> if there was no match |
| * @throws BadLocationException if startOffset is an invalid document offset |
| * @throws IllegalStateException if a REPLACE or REPLACE_FIND operation is not preceded by a successful FIND operation |
| * @throws PatternSyntaxException if a regular expression has invalid syntax |
| */ |
| private IRegion findReplace(final FindReplaceOperationCode operationCode, int startOffset, String findString, String replaceText, boolean forwardSearch, boolean caseSensitive, boolean wholeWord, boolean regExSearch) throws BadLocationException { |
| |
| // Validate option combinations |
| Assert.isTrue(!(regExSearch && wholeWord)); |
| |
| // Validate state |
| if ((operationCode == REPLACE || operationCode == REPLACE_FIND_NEXT) && (fFindReplaceState != FIND_FIRST && fFindReplaceState != FIND_NEXT)) |
| throw new IllegalStateException("illegal findReplace state: cannot replace without preceding find"); //$NON-NLS-1$ |
| |
| if (operationCode == FIND_FIRST) { |
| // Reset |
| |
| if (findString == null || findString.length() == 0) |
| return null; |
| |
| // Validate start offset |
| if (startOffset < 0 || startOffset >= length()) |
| throw new BadLocationException(); |
| |
| int patternFlags= 0; |
| |
| if (regExSearch) { |
| patternFlags |= Pattern.MULTILINE; |
| findString= substituteLinebreak(findString); |
| } |
| |
| if (!caseSensitive) |
| patternFlags |= Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; |
| |
| if (wholeWord) |
| findString= "\\b" + findString + "\\b"; //$NON-NLS-1$ //$NON-NLS-2$ |
| |
| if (!regExSearch && !wholeWord) |
| findString= asRegPattern(findString); |
| |
| fFindReplaceMatchOffset= startOffset; |
| if (fFindReplaceMatcher != null && fFindReplaceMatcher.pattern().pattern().equals(findString) && fFindReplaceMatcher.pattern().flags() == patternFlags) { |
| /* |
| * Commented out for optimization: |
| * The call is not needed since FIND_FIRST uses find(int) which resets the matcher |
| */ |
| // fFindReplaceMatcher.reset(); |
| } else { |
| Pattern pattern= Pattern.compile(findString, patternFlags); |
| fFindReplaceMatcher= pattern.matcher(this); |
| } |
| } |
| |
| // Set state |
| fFindReplaceState= operationCode; |
| |
| if (operationCode == REPLACE || operationCode == REPLACE_FIND_NEXT) { |
| if (regExSearch) { |
| Pattern pattern= fFindReplaceMatcher.pattern(); |
| String prevMatch= fFindReplaceMatcher.group(); |
| try { |
| replaceText= interpretReplaceEscapes(replaceText, prevMatch); |
| Matcher replaceTextMatcher= pattern.matcher(prevMatch); |
| replaceText= replaceTextMatcher.replaceFirst(replaceText); |
| } catch (IndexOutOfBoundsException ex) { |
| throw new PatternSyntaxException(ex.getLocalizedMessage(), replaceText, -1); |
| } |
| } |
| |
| int offset= fFindReplaceMatcher.start(); |
| int length= fFindReplaceMatcher.group().length(); |
| |
| if (fDocument instanceof IRepairableDocumentExtension |
| && ((IRepairableDocumentExtension)fDocument).isLineInformationRepairNeeded(offset, length, replaceText)) { |
| String message= TextMessages.getString("FindReplaceDocumentAdapter.incompatibleLineDelimiter"); //$NON-NLS-1$ |
| throw new PatternSyntaxException(message, replaceText, offset); |
| } |
| |
| fDocument.replace(offset, length, replaceText); |
| |
| if (operationCode == REPLACE) { |
| return new Region(offset, replaceText.length()); |
| } |
| } |
| |
| if (operationCode != REPLACE) { |
| try { |
| if (forwardSearch) { |
| |
| boolean found= false; |
| if (operationCode == FIND_FIRST) |
| found= fFindReplaceMatcher.find(startOffset); |
| else |
| found= fFindReplaceMatcher.find(); |
| |
| if (operationCode == REPLACE_FIND_NEXT) |
| fFindReplaceState= FIND_NEXT; |
| |
| if (found && fFindReplaceMatcher.group().length() > 0) |
| return new Region(fFindReplaceMatcher.start(), fFindReplaceMatcher.group().length()); |
| return null; |
| } |
| // backward search |
| boolean found= fFindReplaceMatcher.find(0); |
| int index= -1; |
| int length= -1; |
| while (found && fFindReplaceMatcher.start() + fFindReplaceMatcher.group().length() <= fFindReplaceMatchOffset + 1) { |
| index= fFindReplaceMatcher.start(); |
| length= fFindReplaceMatcher.group().length(); |
| found= fFindReplaceMatcher.find(index + 1); |
| } |
| fFindReplaceMatchOffset= index; |
| if (index > -1) { |
| // must set matcher to correct position |
| fFindReplaceMatcher.find(index); |
| return new Region(index, length); |
| } |
| return null; |
| } catch (StackOverflowError e) { |
| String message= TextMessages.getString("FindReplaceDocumentAdapter.patternTooComplex"); //$NON-NLS-1$ |
| throw new PatternSyntaxException(message, findString, -1); |
| } |
| } |
| |
| return null; |
| } |
| |
| /** |
| * Substitutes \R in a regex find pattern with (?>\r\n?|\n) |
| * |
| * @param findString the original find pattern |
| * @return the transformed find pattern |
| * @throws PatternSyntaxException if \R is added at an illegal position (e.g. in a character set) |
| * @since 3.4 |
| */ |
| private String substituteLinebreak(String findString) throws PatternSyntaxException { |
| int length= findString.length(); |
| StringBuffer buf= new StringBuffer(length); |
| |
| int inCharGroup= 0; |
| int inBraces= 0; |
| boolean inQuote= false; |
| for (int i= 0; i < length; i++) { |
| char ch= findString.charAt(i); |
| switch (ch) { |
| case '[': |
| buf.append(ch); |
| if (! inQuote) |
| inCharGroup++; |
| break; |
| |
| case ']': |
| buf.append(ch); |
| if (! inQuote) |
| inCharGroup--; |
| break; |
| |
| case '{': |
| buf.append(ch); |
| if (! inQuote && inCharGroup == 0) |
| inBraces++; |
| break; |
| |
| case '}': |
| buf.append(ch); |
| if (! inQuote && inCharGroup == 0) |
| inBraces--; |
| break; |
| |
| case '\\': |
| if (i + 1 < length) { |
| char ch1= findString.charAt(i + 1); |
| if (inQuote) { |
| if (ch1 == 'E') |
| inQuote= false; |
| buf.append(ch).append(ch1); |
| i++; |
| |
| } else if (ch1 == 'R') { |
| if (inCharGroup > 0 || inBraces > 0) { |
| String msg= TextMessages.getString("FindReplaceDocumentAdapter.illegalLinebreak"); //$NON-NLS-1$ |
| throw new PatternSyntaxException(msg, findString, i); |
| } |
| buf.append("(?>\\r\\n?|\\n)"); //$NON-NLS-1$ |
| i++; |
| |
| } else { |
| if (ch1 == 'Q') { |
| inQuote= true; |
| } |
| buf.append(ch).append(ch1); |
| i++; |
| } |
| } else { |
| buf.append(ch); |
| } |
| break; |
| |
| default: |
| buf.append(ch); |
| break; |
| } |
| |
| } |
| return buf.toString(); |
| } |
| |
| /** |
| * Interprets current Retain Case mode (all upper-case,all lower-case,capitalized or mixed) |
| * and appends the character <code>ch</code> to <code>buf</code> after processing. |
| * |
| * @param buf the output buffer |
| * @param ch the character to process |
| * @since 3.4 |
| */ |
| private void interpretRetainCase(StringBuffer buf, char ch) { |
| if (fRetainCaseMode == RC_UPPER) |
| buf.append(Character.toUpperCase(ch)); |
| else if (fRetainCaseMode == RC_LOWER) |
| buf.append(Character.toLowerCase(ch)); |
| else if (fRetainCaseMode == RC_FIRSTUPPER) { |
| buf.append(Character.toUpperCase(ch)); |
| fRetainCaseMode= RC_MIXED; |
| } else |
| buf.append(ch); |
| } |
| |
| /** |
| * Interprets escaped characters in the given replace pattern. |
| * |
| * @param replaceText the replace pattern |
| * @param foundText the found pattern to be replaced |
| * @return a replace pattern with escaped characters substituted by the respective characters |
| * @since 3.4 |
| */ |
| private String interpretReplaceEscapes(String replaceText, String foundText) { |
| int length= replaceText.length(); |
| boolean inEscape= false; |
| StringBuffer buf= new StringBuffer(length); |
| |
| /* every string we did not check looks mixed at first |
| * so initialize retain case mode with RC_MIXED |
| */ |
| fRetainCaseMode= RC_MIXED; |
| |
| for (int i= 0; i < length; i++) { |
| final char ch= replaceText.charAt(i); |
| if (inEscape) { |
| i= interpretReplaceEscape(ch, i, buf, replaceText, foundText); |
| inEscape= false; |
| |
| } else if (ch == '\\') { |
| inEscape= true; |
| |
| } else if (ch == '$') { |
| buf.append(ch); |
| |
| /* |
| * Feature in java.util.regex.Matcher#replaceFirst(String): |
| * $00, $000, etc. are interpreted as $0 and |
| * $01, $001, etc. are interpreted as $1, etc. . |
| * If we support \0 as replacement pattern for capturing group 0, |
| * it would not be possible any more to write a replacement pattern |
| * that appends 0 to a capturing group (like $0\0). |
| * The fix is to interpret \00 and $00 as $0\0, and |
| * \01 and $01 as $0\1, etc. |
| */ |
| if (i + 2 < length) { |
| char ch1= replaceText.charAt(i + 1); |
| char ch2= replaceText.charAt(i + 2); |
| if (ch1 == '0' && '0' <= ch2 && ch2 <= '9') { |
| buf.append("0\\"); //$NON-NLS-1$ |
| i++; // consume the 0 |
| } |
| } |
| } else { |
| interpretRetainCase(buf, ch); |
| } |
| } |
| |
| if (inEscape) { |
| // '\' as last character is invalid, but we still add it to get an error message |
| buf.append('\\'); |
| } |
| return buf.toString(); |
| } |
| |
| /** |
| * Interprets the escaped character <code>ch</code> at offset <code>i</code> |
| * of the <code>replaceText</code> and appends the interpretation to <code>buf</code>. |
| * |
| * @param ch the escaped character |
| * @param i the offset |
| * @param buf the output buffer |
| * @param replaceText the original replace pattern |
| * @param foundText the found pattern to be replaced |
| * @return the new offset |
| * @since 3.4 |
| */ |
| private int interpretReplaceEscape(final char ch, int i, StringBuffer buf, String replaceText, String foundText) { |
| int length= replaceText.length(); |
| switch (ch) { |
| case 'r': |
| buf.append('\r'); |
| break; |
| case 'n': |
| buf.append('\n'); |
| break; |
| case 't': |
| buf.append('\t'); |
| break; |
| case 'f': |
| buf.append('\f'); |
| break; |
| case 'a': |
| buf.append('\u0007'); |
| break; |
| case 'e': |
| buf.append('\u001B'); |
| break; |
| case 'R': //see http://www.unicode.org/unicode/reports/tr18/#Line_Boundaries |
| buf.append(TextUtilities.getDefaultLineDelimiter(fDocument)); |
| break; |
| /* |
| * \0 for octal is not supported in replace string, since it |
| * would conflict with capturing group \0, etc. |
| */ |
| case '0': |
| buf.append('$').append(ch); |
| /* |
| * See explanation in "Feature in java.util.regex.Matcher#replaceFirst(String)" |
| * in interpretReplaceEscape(String) above. |
| */ |
| if (i + 1 < length) { |
| char ch1= replaceText.charAt(i + 1); |
| if ('0' <= ch1 && ch1 <= '9') { |
| buf.append('\\'); |
| } |
| } |
| break; |
| |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| case '8': |
| case '9': |
| buf.append('$').append(ch); |
| break; |
| |
| case 'c': |
| if (i + 1 < length) { |
| char ch1= replaceText.charAt(i + 1); |
| interpretRetainCase(buf, (char)(ch1 ^ 64)); |
| i++; |
| } else { |
| String msg= TextMessages.getFormattedString("FindReplaceDocumentAdapter.illegalControlEscape", "\\c"); //$NON-NLS-1$ //$NON-NLS-2$ |
| throw new PatternSyntaxException(msg, replaceText, i); |
| } |
| break; |
| |
| case 'x': |
| if (i + 2 < length) { |
| int parsedInt; |
| try { |
| parsedInt= Integer.parseInt(replaceText.substring(i + 1, i + 3), 16); |
| if (parsedInt < 0) |
| throw new NumberFormatException(); |
| } catch (NumberFormatException e) { |
| String msg= TextMessages.getFormattedString("FindReplaceDocumentAdapter.illegalHexEscape", replaceText.substring(i - 1, i + 3)); //$NON-NLS-1$ |
| throw new PatternSyntaxException(msg, replaceText, i); |
| } |
| interpretRetainCase(buf, (char) parsedInt); |
| i+= 2; |
| } else { |
| String msg= TextMessages.getFormattedString("FindReplaceDocumentAdapter.illegalHexEscape", replaceText.substring(i - 1, length)); //$NON-NLS-1$ |
| throw new PatternSyntaxException(msg, replaceText, i); |
| } |
| break; |
| |
| case 'u': |
| if (i + 4 < length) { |
| int parsedInt; |
| try { |
| parsedInt= Integer.parseInt(replaceText.substring(i + 1, i + 5), 16); |
| if (parsedInt < 0) |
| throw new NumberFormatException(); |
| } catch (NumberFormatException e) { |
| String msg= TextMessages.getFormattedString("FindReplaceDocumentAdapter.illegalUnicodeEscape", replaceText.substring(i - 1, i + 5)); //$NON-NLS-1$ |
| throw new PatternSyntaxException(msg, replaceText, i); |
| } |
| interpretRetainCase(buf, (char) parsedInt); |
| i+= 4; |
| } else { |
| String msg= TextMessages.getFormattedString("FindReplaceDocumentAdapter.illegalUnicodeEscape", replaceText.substring(i - 1, length)); //$NON-NLS-1$ |
| throw new PatternSyntaxException(msg, replaceText, i); |
| } |
| break; |
| |
| case 'C': |
| if(foundText.toUpperCase().equals(foundText)) // is whole match upper-case? |
| fRetainCaseMode= RC_UPPER; |
| else if (foundText.toLowerCase().equals(foundText)) // is whole match lower-case? |
| fRetainCaseMode= RC_LOWER; |
| else if(Character.isUpperCase(foundText.charAt(0))) // is first character upper-case? |
| fRetainCaseMode= RC_FIRSTUPPER; |
| else |
| fRetainCaseMode= RC_MIXED; |
| break; |
| |
| default: |
| // unknown escape k: append uninterpreted \k |
| buf.append('\\').append(ch); |
| break; |
| } |
| return i; |
| } |
| |
| /** |
| * Converts a non-regex string to a pattern |
| * that can be used with the regex search engine. |
| * |
| * @param string the non-regex pattern |
| * @return the string converted to a regex pattern |
| */ |
| private String asRegPattern(String string) { |
| StringBuffer out= new StringBuffer(string.length()); |
| boolean quoting= false; |
| |
| for (int i= 0, length= string.length(); i < length; i++) { |
| char ch= string.charAt(i); |
| if (ch == '\\') { |
| if (quoting) { |
| out.append("\\E"); //$NON-NLS-1$ |
| quoting= false; |
| } |
| out.append("\\\\"); //$NON-NLS-1$ |
| continue; |
| } |
| if (!quoting) { |
| out.append("\\Q"); //$NON-NLS-1$ |
| quoting= true; |
| } |
| out.append(ch); |
| } |
| if (quoting) |
| out.append("\\E"); //$NON-NLS-1$ |
| |
| return out.toString(); |
| } |
| |
| /** |
| * Substitutes the previous match with the given text. |
| * Sends a <code>DocumentEvent</code> to all registered <code>IDocumentListener</code>. |
| * |
| * @param text the substitution text |
| * @param regExReplace if <code>true</code> <code>text</code> represents a regular expression |
| * @return the replace region or <code>null</code> if there was no match |
| * @throws BadLocationException if startOffset is an invalid document offset |
| * @throws IllegalStateException if a REPLACE or REPLACE_FIND operation is not preceded by a successful FIND operation |
| * @throws PatternSyntaxException if a regular expression has invalid syntax |
| * |
| * @see DocumentEvent |
| * @see IDocumentListener |
| */ |
| public IRegion replace(String text, boolean regExReplace) throws BadLocationException { |
| return findReplace(REPLACE, -1, null, text, false, false, false, regExReplace); |
| } |
| |
| // ---------- CharSequence implementation ---------- |
| |
| /* |
| * @see java.lang.CharSequence#length() |
| */ |
| public int length() { |
| return fDocument.getLength(); |
| } |
| |
| /* |
| * @see java.lang.CharSequence#charAt(int) |
| */ |
| public char charAt(int index) { |
| try { |
| return fDocument.getChar(index); |
| } catch (BadLocationException e) { |
| throw new IndexOutOfBoundsException(); |
| } |
| } |
| |
| /* |
| * @see java.lang.CharSequence#subSequence(int, int) |
| */ |
| public CharSequence subSequence(int start, int end) { |
| try { |
| return fDocument.get(start, end - start); |
| } catch (BadLocationException e) { |
| throw new IndexOutOfBoundsException(); |
| } |
| } |
| |
| /* |
| * @see java.lang.Object#toString() |
| */ |
| public String toString() { |
| return fDocument.get(); |
| } |
| } |