blob: eace57aec89d17ed2ab1723481ce4412f1c61902 [file] [log] [blame]
/**********************************************************************************************************************
* Copyright (c) 2008, 2013 Empolis Information Management GmbH and brox IT Solutions GmbH. All rights reserved. This
* program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0 which
* accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
*
* Contributors: Andreas Schank (Empolis Information Management GmbH) - initial implementation
**********************************************************************************************************************/
package org.eclipse.smila.processing.pipelets;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.Locale;
import org.apache.commons.codec.binary.Base64InputStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.eclipse.smila.blackboard.Blackboard;
import org.eclipse.smila.blackboard.BlackboardAccessException;
import org.eclipse.smila.datamodel.Any;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.DataFactory;
import org.eclipse.smila.datamodel.Record;
import org.eclipse.smila.processing.Pipelet;
import org.eclipse.smila.processing.ProcessingException;
import org.eclipse.smila.processing.parameters.ParameterAccessor;
import org.eclipse.smila.processing.util.ProcessingConstants;
import org.eclipse.smila.processing.util.ResultCollector;
import org.eclipse.smila.utils.codec.EncodingTools;
/**
* Decodes/encodes encoded attributes into or from attachments.
*/
public class CodecPipelet implements Pipelet {
/** key for the name of the attachment. */
public static final String KEY_NAME = "name";
/** key for the content of the attachment. */
public static final String KEY_CONTENT = "content";
/**
* Name of the input attribute where the encoded attachments are stored.
*/
public static final String PROP_ATTRIBUTE_NAME = "encodedContentAttribute";
/** The string encoding. */
public static final Charset CHARSET = Charset.forName("UTF-8");
/**
* Name of the codec. Currently only BASE64 is supported. (Which is also the default value.)
*/
public static final String PROP_CODEC_NAME = "codec";
/**
* If set to "true", the Pipelet encodes attachments to encoded strings in the attribute. If set to "false", the
* Piplet decodes strings from the attribute to attachments. This parameter is mandatory.
*/
public static final String PROP_ENCODE = "encode";
/**
* If set to 'true', the Pipelet removes the input of the operation (either attachment or attribute). If set to
* 'false' (default), the content remains in the processed record for further processing. Default: 'false'.
*/
public static final String PROP_REMOVE_INPUT = "removeInput";
/**
* Default attribute name for the attribute containing the attachments.
*/
public static final String DEFAULT_ATTRIBUTE_NAME = "_attachments";
/* Encopding keys, must be uppercase. */
/**
* BASE64 encoding.
*/
public static final String ENCODING_BASE64 = "BASE64";
/**
* Default encoding.
*/
public static final String DEFAULT_ENCODING = ENCODING_BASE64;
/**
* Supported encodings.
*/
public static final Collection<String> SUPPORTED_ENCODINGS = Collections.singletonList(ENCODING_BASE64
.toUpperCase(Locale.ENGLISH));
/** The pipelet configuration. */
private AnyMap _config;
/** The pipelet's logger. */
private final Log _log = LogFactory.getLog(getClass());
/**
* {@inheritDoc}
*/
@Override
public void configure(final AnyMap configuration) throws ProcessingException {
_config = configuration;
}
/**
* Decodes encoded strings which are stored in a sequence attribute of the record and stores them as attachments.
*
* The sttribute contains a sequence of maps with keys for the "name" of the attachment and the encoded "content". The
* default name of the attribute is "_attachments".
*
* The attachments' names will be equal to the "name" value of the attachment entry in the sequence.
*
* Example record:
*
* <pre>
* {
* "_attachments" : [{
* "name" : "att1.txt",
* "content" : "VGhpcyBpcyB0aGUgZGVjb2RlZCBjb250ZW50Lg=="
* }, {
* "name" : "att2.pdf",
* "content" : "VGhpcyBpcyB0aGUgZGVjb2RlZCBjb250ZW50Lg=="
* }
* ]
* }
* </pre>
*/
@Override
public String[] process(final Blackboard blackboard, final String[] recordIds) throws ProcessingException {
final ParameterAccessor paramAccessor = new ParameterAccessor(blackboard, _config);
final ResultCollector resultCollector =
new ResultCollector(paramAccessor, _log, ProcessingConstants.DROP_ON_ERROR_DEFAULT);
if (recordIds != null) {
for (final String id : recordIds) {
try {
paramAccessor.setCurrentRecord(id);
final String attributeName = paramAccessor.getParameter(PROP_ATTRIBUTE_NAME, DEFAULT_ATTRIBUTE_NAME);
final String encodingName = paramAccessor.getParameter(PROP_CODEC_NAME, DEFAULT_ENCODING);
final boolean removeInput = paramAccessor.getBooleanParameter(PROP_REMOVE_INPUT, false);
final boolean encode = paramAccessor.getBooleanParameter(PROP_ENCODE, false);
if (!SUPPORTED_ENCODINGS.contains(encodingName.toUpperCase(Locale.ENGLISH))) {
throw new ProcessingException("Encoding " + encodingName + " is not supported.");
}
if (encode) {
// encode the attachments into the configured attribute
final Record record = blackboard.getRecord(id);
final Iterator<String> attachmentNamesIter = record.getAttachmentNames();
final Collection<String> attachmentNames = new ArrayList<>();
// copy so we can safely remove attachments during iteration
while (attachmentNamesIter.hasNext()) {
attachmentNames.add(attachmentNamesIter.next());
}
for (final String attachmentName : attachmentNames) {
final AnyMap attachmentEntry = encodeAttachment(blackboard, id, encodingName, attachmentName);
record.getMetadata().add(attributeName, attachmentEntry);
// remove attachment if configured
if (removeInput) {
blackboard.removeAttachment(id, attachmentName);
}
}
} else {
// decode the encoded content
final AnyMap metadata = blackboard.getMetadata(id);
if (metadata.containsKey(attributeName)) {
for (final Any attachmentEntry : metadata.get(attributeName)) {
final AnyMap entry = attachmentEntry.asMap();
decodeEntry(blackboard, id, encodingName, entry);
}
// remove input attribute if configured
if (removeInput) {
metadata.remove(attributeName);
}
}
}
blackboard.commitRecord(id);
resultCollector.addResult(id);
} catch (final Exception ex) {
resultCollector.addFailedResult(id, ex);
}
}
}
return resultCollector.getResultIds();
}
/**
* Encodes an attachment to an encoded entry in an attribute.
*
* @param blackboard
* the blackboard
* @param id
* the id of the record
* @param encodingName
* the name of the encoding to use
* @param attachmentName
* the name of the attachment
* @return the entry containing the name and encoded content of the attachment
* @throws BlackboardAccessException
* @throws IOException
*/
protected AnyMap encodeAttachment(final Blackboard blackboard, final String id, final String encodingName,
final String attachmentName) throws BlackboardAccessException, IOException {
final String encodingString;
// get the correct stream for the encoding
switch (encodingName.toUpperCase(Locale.ENGLISH)) {
case ENCODING_BASE64:
// use no line length limitation and no line separator
encodingString = EncodingTools.toBase64(blackboard.getAttachmentAsStream(id, attachmentName));
break;
default:
encodingString = EncodingTools.toBase64(blackboard.getAttachmentAsStream(id, attachmentName));
}
final AnyMap attachmentEntry = DataFactory.DEFAULT.createAnyMap();
attachmentEntry.put(KEY_NAME, attachmentName);
attachmentEntry.put(KEY_CONTENT, encodingString);
return attachmentEntry;
}
/**
* Decode an entry.
*
* @param blackboard
* The blackboard
* @param id
* the record's id
* @param encodingName
* the name of the encoding to use
* @param entry
* thh entry containing the name and encoded content
* @throws BlackboardAccessException
*/
protected void decodeEntry(final Blackboard blackboard, final String id, final String encodingName,
final AnyMap entry) throws BlackboardAccessException {
final String attachmentName = entry.getStringValue(KEY_NAME);
final InputStream sourceStream = new ByteArrayInputStream(entry.getStringValue(KEY_CONTENT).getBytes(CHARSET));
InputStream attachmentStream;
// get the correct stream for the encoding
switch (encodingName.toUpperCase(Locale.ENGLISH)) {
case ENCODING_BASE64:
attachmentStream = new Base64InputStream(sourceStream, false);
break;
default:
attachmentStream = new Base64InputStream(sourceStream, false);
}
blackboard.setAttachmentFromStream(id, attachmentName, attachmentStream);
}
}