| /********************************************************************************************************************** |
| * Copyright (c) 2008, 2013 Empolis Information Management GmbH and brox IT Solutions GmbH. All rights reserved. This |
| * program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0 which |
| * accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: Andreas Schank (Empolis Information Management GmbH) - initial implementation |
| **********************************************************************************************************************/ |
| package org.eclipse.smila.processing.pipelets; |
| |
| import java.io.ByteArrayInputStream; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.nio.charset.Charset; |
| import java.util.ArrayList; |
| import java.util.Collection; |
| import java.util.Collections; |
| import java.util.Iterator; |
| import java.util.Locale; |
| |
| import org.apache.commons.codec.binary.Base64InputStream; |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| import org.eclipse.smila.blackboard.Blackboard; |
| import org.eclipse.smila.blackboard.BlackboardAccessException; |
| import org.eclipse.smila.datamodel.Any; |
| import org.eclipse.smila.datamodel.AnyMap; |
| import org.eclipse.smila.datamodel.DataFactory; |
| import org.eclipse.smila.datamodel.Record; |
| import org.eclipse.smila.processing.Pipelet; |
| import org.eclipse.smila.processing.ProcessingException; |
| import org.eclipse.smila.processing.parameters.ParameterAccessor; |
| import org.eclipse.smila.processing.util.ProcessingConstants; |
| import org.eclipse.smila.processing.util.ResultCollector; |
| import org.eclipse.smila.utils.codec.EncodingTools; |
| |
| /** |
| * Decodes/encodes encoded attributes into or from attachments. |
| */ |
| public class CodecPipelet implements Pipelet { |
| |
| /** key for the name of the attachment. */ |
| public static final String KEY_NAME = "name"; |
| |
| /** key for the content of the attachment. */ |
| public static final String KEY_CONTENT = "content"; |
| |
| /** |
| * Name of the input attribute where the encoded attachments are stored. |
| */ |
| public static final String PROP_ATTRIBUTE_NAME = "encodedContentAttribute"; |
| |
| /** The string encoding. */ |
| public static final Charset CHARSET = Charset.forName("UTF-8"); |
| |
| /** |
| * Name of the codec. Currently only BASE64 is supported. (Which is also the default value.) |
| */ |
| public static final String PROP_CODEC_NAME = "codec"; |
| |
| /** |
| * If set to "true", the Pipelet encodes attachments to encoded strings in the attribute. If set to "false", the |
| * Piplet decodes strings from the attribute to attachments. This parameter is mandatory. |
| */ |
| public static final String PROP_ENCODE = "encode"; |
| |
| /** |
| * If set to 'true', the Pipelet removes the input of the operation (either attachment or attribute). If set to |
| * 'false' (default), the content remains in the processed record for further processing. Default: 'false'. |
| */ |
| public static final String PROP_REMOVE_INPUT = "removeInput"; |
| |
| /** |
| * Default attribute name for the attribute containing the attachments. |
| */ |
| public static final String DEFAULT_ATTRIBUTE_NAME = "_attachments"; |
| |
| /* Encopding keys, must be uppercase. */ |
| /** |
| * BASE64 encoding. |
| */ |
| public static final String ENCODING_BASE64 = "BASE64"; |
| |
| /** |
| * Default encoding. |
| */ |
| public static final String DEFAULT_ENCODING = ENCODING_BASE64; |
| |
| /** |
| * Supported encodings. |
| */ |
| public static final Collection<String> SUPPORTED_ENCODINGS = Collections.singletonList(ENCODING_BASE64 |
| .toUpperCase(Locale.ENGLISH)); |
| |
| /** The pipelet configuration. */ |
| private AnyMap _config; |
| |
| /** The pipelet's logger. */ |
| private final Log _log = LogFactory.getLog(getClass()); |
| |
| /** |
| * {@inheritDoc} |
| */ |
| @Override |
| public void configure(final AnyMap configuration) throws ProcessingException { |
| _config = configuration; |
| } |
| |
| /** |
| * Decodes encoded strings which are stored in a sequence attribute of the record and stores them as attachments. |
| * |
| * The sttribute contains a sequence of maps with keys for the "name" of the attachment and the encoded "content". The |
| * default name of the attribute is "_attachments". |
| * |
| * The attachments' names will be equal to the "name" value of the attachment entry in the sequence. |
| * |
| * Example record: |
| * |
| * <pre> |
| * { |
| * "_attachments" : [{ |
| * "name" : "att1.txt", |
| * "content" : "VGhpcyBpcyB0aGUgZGVjb2RlZCBjb250ZW50Lg==" |
| * }, { |
| * "name" : "att2.pdf", |
| * "content" : "VGhpcyBpcyB0aGUgZGVjb2RlZCBjb250ZW50Lg==" |
| * } |
| * ] |
| * } |
| * </pre> |
| */ |
| @Override |
| public String[] process(final Blackboard blackboard, final String[] recordIds) throws ProcessingException { |
| final ParameterAccessor paramAccessor = new ParameterAccessor(blackboard, _config); |
| final ResultCollector resultCollector = |
| new ResultCollector(paramAccessor, _log, ProcessingConstants.DROP_ON_ERROR_DEFAULT); |
| if (recordIds != null) { |
| for (final String id : recordIds) { |
| try { |
| paramAccessor.setCurrentRecord(id); |
| final String attributeName = paramAccessor.getParameter(PROP_ATTRIBUTE_NAME, DEFAULT_ATTRIBUTE_NAME); |
| final String encodingName = paramAccessor.getParameter(PROP_CODEC_NAME, DEFAULT_ENCODING); |
| final boolean removeInput = paramAccessor.getBooleanParameter(PROP_REMOVE_INPUT, false); |
| final boolean encode = paramAccessor.getBooleanParameter(PROP_ENCODE, false); |
| |
| if (!SUPPORTED_ENCODINGS.contains(encodingName.toUpperCase(Locale.ENGLISH))) { |
| throw new ProcessingException("Encoding " + encodingName + " is not supported."); |
| } |
| |
| if (encode) { |
| // encode the attachments into the configured attribute |
| final Record record = blackboard.getRecord(id); |
| final Iterator<String> attachmentNamesIter = record.getAttachmentNames(); |
| final Collection<String> attachmentNames = new ArrayList<>(); |
| // copy so we can safely remove attachments during iteration |
| while (attachmentNamesIter.hasNext()) { |
| attachmentNames.add(attachmentNamesIter.next()); |
| } |
| for (final String attachmentName : attachmentNames) { |
| final AnyMap attachmentEntry = encodeAttachment(blackboard, id, encodingName, attachmentName); |
| record.getMetadata().add(attributeName, attachmentEntry); |
| |
| // remove attachment if configured |
| if (removeInput) { |
| blackboard.removeAttachment(id, attachmentName); |
| } |
| } |
| } else { |
| // decode the encoded content |
| final AnyMap metadata = blackboard.getMetadata(id); |
| if (metadata.containsKey(attributeName)) { |
| for (final Any attachmentEntry : metadata.get(attributeName)) { |
| final AnyMap entry = attachmentEntry.asMap(); |
| decodeEntry(blackboard, id, encodingName, entry); |
| } |
| // remove input attribute if configured |
| if (removeInput) { |
| metadata.remove(attributeName); |
| } |
| } |
| } |
| blackboard.commitRecord(id); |
| |
| resultCollector.addResult(id); |
| } catch (final Exception ex) { |
| resultCollector.addFailedResult(id, ex); |
| } |
| } |
| } |
| return resultCollector.getResultIds(); |
| } |
| |
| /** |
| * Encodes an attachment to an encoded entry in an attribute. |
| * |
| * @param blackboard |
| * the blackboard |
| * @param id |
| * the id of the record |
| * @param encodingName |
| * the name of the encoding to use |
| * @param attachmentName |
| * the name of the attachment |
| * @return the entry containing the name and encoded content of the attachment |
| * @throws BlackboardAccessException |
| * @throws IOException |
| */ |
| protected AnyMap encodeAttachment(final Blackboard blackboard, final String id, final String encodingName, |
| final String attachmentName) throws BlackboardAccessException, IOException { |
| final String encodingString; |
| // get the correct stream for the encoding |
| switch (encodingName.toUpperCase(Locale.ENGLISH)) { |
| case ENCODING_BASE64: |
| // use no line length limitation and no line separator |
| encodingString = EncodingTools.toBase64(blackboard.getAttachmentAsStream(id, attachmentName)); |
| break; |
| default: |
| encodingString = EncodingTools.toBase64(blackboard.getAttachmentAsStream(id, attachmentName)); |
| } |
| final AnyMap attachmentEntry = DataFactory.DEFAULT.createAnyMap(); |
| attachmentEntry.put(KEY_NAME, attachmentName); |
| attachmentEntry.put(KEY_CONTENT, encodingString); |
| return attachmentEntry; |
| } |
| |
| /** |
| * Decode an entry. |
| * |
| * @param blackboard |
| * The blackboard |
| * @param id |
| * the record's id |
| * @param encodingName |
| * the name of the encoding to use |
| * @param entry |
| * thh entry containing the name and encoded content |
| * @throws BlackboardAccessException |
| */ |
| protected void decodeEntry(final Blackboard blackboard, final String id, final String encodingName, |
| final AnyMap entry) throws BlackboardAccessException { |
| final String attachmentName = entry.getStringValue(KEY_NAME); |
| final InputStream sourceStream = new ByteArrayInputStream(entry.getStringValue(KEY_CONTENT).getBytes(CHARSET)); |
| InputStream attachmentStream; |
| // get the correct stream for the encoding |
| switch (encodingName.toUpperCase(Locale.ENGLISH)) { |
| case ENCODING_BASE64: |
| attachmentStream = new Base64InputStream(sourceStream, false); |
| break; |
| default: |
| attachmentStream = new Base64InputStream(sourceStream, false); |
| } |
| |
| blackboard.setAttachmentFromStream(id, attachmentName, attachmentStream); |
| } |
| |
| } |