| /******************************************************************************* |
| * Copyright (c) 2008, 2011 Attensity Europe GmbH and brox IT Solutions GmbH. All rights reserved. This program and the |
| * accompanying materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this |
| * distribution, and is available at http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: Tobias Liefke - initial API and implementation |
| *******************************************************************************/ |
| package org.eclipse.smila.processing.pipelets; |
| |
| import java.util.ArrayList; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.regex.Pattern; |
| |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| import org.eclipse.smila.blackboard.Blackboard; |
| import org.eclipse.smila.datamodel.Any; |
| import org.eclipse.smila.datamodel.AnyMap; |
| import org.eclipse.smila.processing.Pipelet; |
| import org.eclipse.smila.processing.ProcessingException; |
| import org.eclipse.smila.processing.parameters.MissingParameterException; |
| import org.eclipse.smila.processing.parameters.ParameterAccessor; |
| import org.eclipse.smila.processing.util.ProcessingConstants; |
| import org.eclipse.smila.processing.util.ResultCollector; |
| |
| /** |
| * Applies one or more patterns to the literal value in an attribute and substitutes the found occurrences by the |
| * configured replacements. |
| * |
| * @author Tobias Liefke |
| */ |
| public class ReplacePipelet implements Pipelet { |
| /** |
| * Name of the property that contains the input attribute name. |
| */ |
| public static final String PROPERTY_INPUT_ATTRIBUTE = "inputAttribute"; |
| |
| /** |
| * Name of the property that contains the output attribute name. |
| */ |
| public static final String PROPERTY_OUTPUT_ATTRIBUTE = "outputAttribute"; |
| |
| /** |
| * Name of the property that contains the type of the pattern. |
| */ |
| public static final String PROPERTY_TYPE = "type"; |
| |
| /** |
| * Name of the property that contains the mapping from pattern to replacement. |
| */ |
| public static final String PROPERTY_MAPPING = "mapping"; |
| |
| /** |
| * Name of the property that contains the pattern. |
| */ |
| public static final String PROPERTY_PATTERN = "pattern"; |
| |
| /** |
| * Name of the property that contains the substitution. |
| */ |
| public static final String PROPERTY_REPLACEMENT = "replacement"; |
| |
| /** |
| * Name of the property that indicates to ignore case. |
| */ |
| public static final String PROPERTY_IGNORE_CASE = "ignoreCase"; |
| |
| /** |
| * The type of the replacement. |
| */ |
| public enum Type { |
| /** The pattern matches the whole string. */ |
| entity, |
| /** The pattern matches a part of the string. */ |
| substring, |
| /** The pattern is a regular expression. */ |
| regexp |
| } |
| |
| /** |
| * The local logger. |
| */ |
| private final Log _log = LogFactory.getLog(getClass()); |
| |
| /** |
| * The pipelet configuration. |
| */ |
| private AnyMap _configuration; |
| |
| /** |
| * The configured mapping. |
| */ |
| private Mapping _mapping; |
| |
| /** |
| * {@inheritDoc} |
| */ |
| @Override |
| public void configure(final AnyMap configuration) throws ProcessingException { |
| _configuration = configuration; |
| final ParameterAccessor paramAccessor = new ParameterAccessor(null, configuration); |
| final boolean ignoreCase = paramAccessor.getBooleanParameter(PROPERTY_IGNORE_CASE, Boolean.FALSE); |
| switch (Type.valueOf(paramAccessor.getParameter(PROPERTY_TYPE, Type.substring.toString()))) { |
| case entity: |
| _mapping = new EntityMapping(ignoreCase); |
| break; |
| case substring: |
| _mapping = new SubstringMapping(ignoreCase); |
| break; |
| case regexp: |
| _mapping = new RegexpMapping(ignoreCase); |
| break; |
| default: |
| // Can't happen |
| } |
| final Any mapping = configuration.get(PROPERTY_MAPPING); |
| if (mapping != null) { |
| if (mapping.isMap()) { |
| // A map with the pattern as key and the replacement as value |
| for (Map.Entry<String, Any> entry : mapping.asMap().entrySet()) { |
| _mapping.addMapping(entry.getKey(), entry.getValue().asValue().asString()); |
| } |
| } else { |
| // A sequence with every object as a map containing the |
| for (Any entry : mapping.asSeq()) { |
| final AnyMap map = entry.asMap(); |
| readPattern(map); |
| } |
| } |
| } else { |
| // Assume that it is only a single pattern |
| _mapping.addMapping(paramAccessor.getRequiredParameter(PROPERTY_PATTERN), |
| paramAccessor.getParameter(PROPERTY_REPLACEMENT, "")); |
| } |
| } |
| |
| /** |
| * Reads the properties {@link #PROPERTY_PATTERN} and {@link #PROPERTY_REPLACEMENT} from the given map and add them to |
| * the current mapping. |
| * |
| * @param map |
| * the configuration map |
| * @throws MissingParameterException |
| * if the {@link #PROPERTY_PATTERN} parameter is missing |
| */ |
| private void readPattern(AnyMap map) throws MissingParameterException { |
| final Any pattern = map.get(PROPERTY_PATTERN); |
| if (pattern == null) { |
| throw new MissingParameterException("Missing required parameter " + PROPERTY_PATTERN); |
| } |
| String replacement = map.getStringValue(PROPERTY_REPLACEMENT); |
| if (replacement == null) { |
| replacement = ""; |
| } |
| if (pattern.isSeq()) { |
| // More than one pattern for the replacement |
| for (String key : pattern.asSeq().asStrings()) { |
| _mapping.addMapping(key, replacement); |
| } |
| } else { |
| // Exactly one pattern for the replacement |
| _mapping.addMapping(pattern.asValue().asString(), replacement); |
| } |
| } |
| |
| /** |
| * {@inheritDoc} |
| */ |
| @Override |
| public String[] process(final Blackboard blackboard, final String[] recordIds) throws ProcessingException { |
| final ParameterAccessor paramAccessor = new ParameterAccessor(blackboard, _configuration); |
| final ResultCollector resultCollector = |
| new ResultCollector(paramAccessor, _log, ProcessingConstants.DROP_ON_ERROR_DEFAULT); |
| if (recordIds != null) { |
| for (final String id : recordIds) { |
| try { |
| paramAccessor.setCurrentRecord(id); |
| final String inputAttribute = paramAccessor.getRequiredParameter(PROPERTY_INPUT_ATTRIBUTE); |
| final AnyMap metadata = blackboard.getMetadata(id); |
| final String value = metadata.getStringValue(inputAttribute); |
| if (value != null) { |
| final String outputAttribute = paramAccessor.getParameter(PROPERTY_OUTPUT_ATTRIBUTE, inputAttribute); |
| metadata.put(outputAttribute, _mapping.apply(value)); |
| } |
| resultCollector.addResult(id); |
| } catch (final Exception e) { |
| resultCollector.addFailedResult(id, e); |
| } |
| } |
| } |
| return resultCollector.getResultIds(); |
| } |
| |
| /** Base class for the mapping types. */ |
| private abstract static class Mapping { |
| /** Indicates that this mapping should ignore the case of characters. */ |
| protected final boolean _ignoreCase; |
| |
| /** |
| * Creates a new instance of Mapping. |
| * |
| * @param ignoreCase |
| * indicates that the mapping should ignore the case of characters. |
| */ |
| public Mapping(boolean ignoreCase) { |
| _ignoreCase = ignoreCase; |
| } |
| |
| /** |
| * Add a single mapping from the configuration. |
| * |
| * @param pattern |
| * the pattern to add |
| * @param replacement |
| * the substitution to use if the pattern matches |
| */ |
| public abstract void addMapping(String pattern, String replacement); |
| |
| /** |
| * Execute this mapping for the given literal. |
| * |
| * @param value |
| * the literal |
| * @return the result with the replacements (the value - if nothing matches) |
| */ |
| public abstract String apply(String value); |
| } |
| |
| /** @see Type#entity */ |
| private final class EntityMapping extends Mapping { |
| /** Contains the entities. */ |
| private final Map<String, String> _patterns = new HashMap<String, String>(); |
| |
| /** |
| * Creates a new instance of EntityMapping. |
| * |
| * @param ignoreCase |
| * indicates that the mapping should ignore the case of characters. |
| */ |
| public EntityMapping(boolean ignoreCase) { |
| super(ignoreCase); |
| } |
| |
| @Override |
| public void addMapping(String pattern, String replacement) { |
| _patterns.put(_ignoreCase ? pattern.toLowerCase() : pattern, replacement); |
| } |
| |
| @Override |
| public String apply(String value) { |
| final String result = _patterns.get(_ignoreCase ? value.toLowerCase() : value); |
| return result == null ? value : result; |
| } |
| } |
| |
| /** @see Type#substring */ |
| private final class SubstringMapping extends Mapping { |
| /** The strings to search for. */ |
| private final List<String> _patterns = new ArrayList<String>(); |
| |
| /** The substitutions for the respective pattern. */ |
| private final List<String> _replacements = new ArrayList<String>(); |
| |
| /** |
| * Creates a new instance of SubstringMapping. |
| * |
| * @param ignoreCase |
| * indicates that the mapping should ignore the case of characters. |
| */ |
| public SubstringMapping(boolean ignoreCase) { |
| super(ignoreCase); |
| } |
| |
| @Override |
| public void addMapping(String pattern, String replacement) { |
| if (pattern.length() == 0) { |
| // Suppress endless loops |
| throw new IllegalArgumentException("Missing pattern for replacement: " + replacement); |
| } |
| _patterns.add(_ignoreCase ? pattern.toLowerCase() : pattern); |
| _replacements.add(replacement); |
| } |
| |
| @Override |
| public String apply(String value) { |
| // Assume that in most cases we don't need to replace and use a lazy string builder |
| StringBuilder result = null; |
| for (int i = 0; i < _patterns.size(); i++) { |
| final String key = _patterns.get(i); |
| int index = result == null ? value.indexOf(key) : result.indexOf(key); |
| while (index >= 0) { |
| final String replacement = _replacements.get(i); |
| if (result == null) { |
| result = new StringBuilder(value.length() - key.length() + replacement.length()); |
| result.append(value, 0, index).append(replacement).append(value, index + key.length(), value.length()); |
| } else { |
| result.replace(index, index + key.length(), replacement); |
| } |
| index = result.indexOf(key, index + replacement.length()); |
| } |
| } |
| return result == null ? value : result.toString(); |
| } |
| } |
| |
| /** @see Type#regexp */ |
| private final class RegexpMapping extends Mapping { |
| /** The regular expressions for matching. */ |
| private List<Pattern> _patterns = new ArrayList<Pattern>(); |
| |
| /** The replacements for the respective pattern. */ |
| private List<String> _replacements = new ArrayList<String>(); |
| |
| /** |
| * Creates a new instance of RegexpMapping. |
| * |
| * @param ignoreCase |
| * indicates that the mapping should ignore the case of characters. |
| */ |
| public RegexpMapping(boolean ignoreCase) { |
| super(ignoreCase); |
| } |
| |
| @Override |
| public void addMapping(String regex, String replacement) { |
| _patterns.add(Pattern.compile(regex, _ignoreCase ? Pattern.CASE_INSENSITIVE : 0)); |
| _replacements.add(replacement); |
| } |
| |
| @Override |
| public String apply(String value) { |
| for (int i = 0; i < _patterns.size(); i++) { |
| value = _patterns.get(i).matcher(value).replaceAll(_replacements.get(i)); |
| } |
| return value; |
| } |
| } |
| |
| } |