blob: f72cdba0c139a1cb872ca8618887bc5e34194191 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2008, 2011 Attensity Europe GmbH and brox IT Solutions GmbH. All rights reserved. This program and the
* accompanying materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this
* distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
*
* Contributors: Tobias Liefke - initial API and implementation
*******************************************************************************/
package org.eclipse.smila.processing.pipelets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.eclipse.smila.blackboard.Blackboard;
import org.eclipse.smila.datamodel.Any;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.processing.Pipelet;
import org.eclipse.smila.processing.ProcessingException;
import org.eclipse.smila.processing.parameters.MissingParameterException;
import org.eclipse.smila.processing.parameters.ParameterAccessor;
import org.eclipse.smila.processing.util.ProcessingConstants;
import org.eclipse.smila.processing.util.ResultCollector;
/**
* Applies one or more patterns to the literal value in an attribute and substitutes the found occurrences by the
* configured replacements.
*
* @author Tobias Liefke
*/
public class ReplacePipelet implements Pipelet {
/**
* Name of the property that contains the input attribute name.
*/
public static final String PROPERTY_INPUT_ATTRIBUTE = "inputAttribute";
/**
* Name of the property that contains the output attribute name.
*/
public static final String PROPERTY_OUTPUT_ATTRIBUTE = "outputAttribute";
/**
* Name of the property that contains the type of the pattern.
*/
public static final String PROPERTY_TYPE = "type";
/**
* Name of the property that contains the mapping from pattern to replacement.
*/
public static final String PROPERTY_MAPPING = "mapping";
/**
* Name of the property that contains the pattern.
*/
public static final String PROPERTY_PATTERN = "pattern";
/**
* Name of the property that contains the substitution.
*/
public static final String PROPERTY_REPLACEMENT = "replacement";
/**
* Name of the property that indicates to ignore case.
*/
public static final String PROPERTY_IGNORE_CASE = "ignoreCase";
/**
* The type of the replacement.
*/
public enum Type {
/** The pattern matches the whole string. */
entity,
/** The pattern matches a part of the string. */
substring,
/** The pattern is a regular expression. */
regexp
}
/**
* The local logger.
*/
private final Log _log = LogFactory.getLog(getClass());
/**
* The pipelet configuration.
*/
private AnyMap _configuration;
/**
* The configured mapping.
*/
private Mapping _mapping;
/**
* {@inheritDoc}
*/
@Override
public void configure(final AnyMap configuration) throws ProcessingException {
_configuration = configuration;
final ParameterAccessor paramAccessor = new ParameterAccessor(null, configuration);
final boolean ignoreCase = paramAccessor.getBooleanParameter(PROPERTY_IGNORE_CASE, Boolean.FALSE);
switch (Type.valueOf(paramAccessor.getParameter(PROPERTY_TYPE, Type.substring.toString()))) {
case entity:
_mapping = new EntityMapping(ignoreCase);
break;
case substring:
_mapping = new SubstringMapping(ignoreCase);
break;
case regexp:
_mapping = new RegexpMapping(ignoreCase);
break;
default:
// Can't happen
}
final Any mapping = configuration.get(PROPERTY_MAPPING);
if (mapping != null) {
if (mapping.isMap()) {
// A map with the pattern as key and the replacement as value
for (Map.Entry<String, Any> entry : mapping.asMap().entrySet()) {
_mapping.addMapping(entry.getKey(), entry.getValue().asValue().asString());
}
} else {
// A sequence with every object as a map containing the
for (Any entry : mapping.asSeq()) {
final AnyMap map = entry.asMap();
readPattern(map);
}
}
} else {
// Assume that it is only a single pattern
_mapping.addMapping(paramAccessor.getRequiredParameter(PROPERTY_PATTERN),
paramAccessor.getParameter(PROPERTY_REPLACEMENT, ""));
}
}
/**
* Reads the properties {@link #PROPERTY_PATTERN} and {@link #PROPERTY_REPLACEMENT} from the given map and add them to
* the current mapping.
*
* @param map
* the configuration map
* @throws MissingParameterException
* if the {@link #PROPERTY_PATTERN} parameter is missing
*/
private void readPattern(AnyMap map) throws MissingParameterException {
final Any pattern = map.get(PROPERTY_PATTERN);
if (pattern == null) {
throw new MissingParameterException("Missing required parameter " + PROPERTY_PATTERN);
}
String replacement = map.getStringValue(PROPERTY_REPLACEMENT);
if (replacement == null) {
replacement = "";
}
if (pattern.isSeq()) {
// More than one pattern for the replacement
for (String key : pattern.asSeq().asStrings()) {
_mapping.addMapping(key, replacement);
}
} else {
// Exactly one pattern for the replacement
_mapping.addMapping(pattern.asValue().asString(), replacement);
}
}
/**
* {@inheritDoc}
*/
@Override
public String[] process(final Blackboard blackboard, final String[] recordIds) throws ProcessingException {
final ParameterAccessor paramAccessor = new ParameterAccessor(blackboard, _configuration);
final ResultCollector resultCollector =
new ResultCollector(paramAccessor, _log, ProcessingConstants.DROP_ON_ERROR_DEFAULT);
if (recordIds != null) {
for (final String id : recordIds) {
try {
paramAccessor.setCurrentRecord(id);
final String inputAttribute = paramAccessor.getRequiredParameter(PROPERTY_INPUT_ATTRIBUTE);
final AnyMap metadata = blackboard.getMetadata(id);
final String value = metadata.getStringValue(inputAttribute);
if (value != null) {
final String outputAttribute = paramAccessor.getParameter(PROPERTY_OUTPUT_ATTRIBUTE, inputAttribute);
metadata.put(outputAttribute, _mapping.apply(value));
}
resultCollector.addResult(id);
} catch (final Exception e) {
resultCollector.addFailedResult(id, e);
}
}
}
return resultCollector.getResultIds();
}
/** Base class for the mapping types. */
private abstract static class Mapping {
/** Indicates that this mapping should ignore the case of characters. */
protected final boolean _ignoreCase;
/**
* Creates a new instance of Mapping.
*
* @param ignoreCase
* indicates that the mapping should ignore the case of characters.
*/
public Mapping(boolean ignoreCase) {
_ignoreCase = ignoreCase;
}
/**
* Add a single mapping from the configuration.
*
* @param pattern
* the pattern to add
* @param replacement
* the substitution to use if the pattern matches
*/
public abstract void addMapping(String pattern, String replacement);
/**
* Execute this mapping for the given literal.
*
* @param value
* the literal
* @return the result with the replacements (the value - if nothing matches)
*/
public abstract String apply(String value);
}
/** @see Type#entity */
private final class EntityMapping extends Mapping {
/** Contains the entities. */
private final Map<String, String> _patterns = new HashMap<String, String>();
/**
* Creates a new instance of EntityMapping.
*
* @param ignoreCase
* indicates that the mapping should ignore the case of characters.
*/
public EntityMapping(boolean ignoreCase) {
super(ignoreCase);
}
@Override
public void addMapping(String pattern, String replacement) {
_patterns.put(_ignoreCase ? pattern.toLowerCase() : pattern, replacement);
}
@Override
public String apply(String value) {
final String result = _patterns.get(_ignoreCase ? value.toLowerCase() : value);
return result == null ? value : result;
}
}
/** @see Type#substring */
private final class SubstringMapping extends Mapping {
/** The strings to search for. */
private final List<String> _patterns = new ArrayList<String>();
/** The substitutions for the respective pattern. */
private final List<String> _replacements = new ArrayList<String>();
/**
* Creates a new instance of SubstringMapping.
*
* @param ignoreCase
* indicates that the mapping should ignore the case of characters.
*/
public SubstringMapping(boolean ignoreCase) {
super(ignoreCase);
}
@Override
public void addMapping(String pattern, String replacement) {
if (pattern.length() == 0) {
// Suppress endless loops
throw new IllegalArgumentException("Missing pattern for replacement: " + replacement);
}
_patterns.add(_ignoreCase ? pattern.toLowerCase() : pattern);
_replacements.add(replacement);
}
@Override
public String apply(String value) {
// Assume that in most cases we don't need to replace and use a lazy string builder
StringBuilder result = null;
for (int i = 0; i < _patterns.size(); i++) {
final String key = _patterns.get(i);
int index = result == null ? value.indexOf(key) : result.indexOf(key);
while (index >= 0) {
final String replacement = _replacements.get(i);
if (result == null) {
result = new StringBuilder(value.length() - key.length() + replacement.length());
result.append(value, 0, index).append(replacement).append(value, index + key.length(), value.length());
} else {
result.replace(index, index + key.length(), replacement);
}
index = result.indexOf(key, index + replacement.length());
}
}
return result == null ? value : result.toString();
}
}
/** @see Type#regexp */
private final class RegexpMapping extends Mapping {
/** The regular expressions for matching. */
private List<Pattern> _patterns = new ArrayList<Pattern>();
/** The replacements for the respective pattern. */
private List<String> _replacements = new ArrayList<String>();
/**
* Creates a new instance of RegexpMapping.
*
* @param ignoreCase
* indicates that the mapping should ignore the case of characters.
*/
public RegexpMapping(boolean ignoreCase) {
super(ignoreCase);
}
@Override
public void addMapping(String regex, String replacement) {
_patterns.add(Pattern.compile(regex, _ignoreCase ? Pattern.CASE_INSENSITIVE : 0));
_replacements.add(replacement);
}
@Override
public String apply(String value) {
for (int i = 0; i < _patterns.size(); i++) {
value = _patterns.get(i).matcher(value).replaceAll(_replacements.get(i));
}
return value;
}
}
}