| /*********************************************************************************************************************** |
| * Copyright (c) 2008, 2013 Empolis Information Management GmbH and brox IT Solutions GmbH. All rights reserved. This |
| * program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0 which |
| * accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: Juergen Schumacher (Empolis Information Management GmbH) - initial implementation, based on |
| * XmlSplitterPipelet |
| **********************************************************************************************************************/ |
| |
| package org.eclipse.smila.processing.pipelets.xmlprocessing; |
| |
| import java.io.FileInputStream; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.net.URL; |
| |
| import javax.xml.namespace.QName; |
| |
| import org.apache.commons.httpclient.HttpClient; |
| import org.apache.commons.httpclient.methods.GetMethod; |
| import org.eclipse.smila.blackboard.Blackboard; |
| import org.eclipse.smila.blackboard.Blackboard.Get; |
| import org.eclipse.smila.blackboard.BlackboardAccessException; |
| import org.eclipse.smila.datamodel.AnyMap; |
| import org.eclipse.smila.datamodel.DataFactory; |
| import org.eclipse.smila.datamodel.Record; |
| import org.eclipse.smila.processing.ProcessingException; |
| import org.eclipse.smila.processing.parameters.MissingParameterException; |
| import org.eclipse.smila.processing.parameters.ParameterAccessor; |
| import org.eclipse.smila.processing.pipelets.ATransformationPipelet; |
| import org.eclipse.smila.processing.pipelets.DocumentSplitterPipelet; |
| import org.eclipse.smila.processing.util.ProcessingConstants; |
| import org.eclipse.smila.processing.util.ResultCollector; |
| import org.eclipse.smila.utils.xml.stax.XmlSnippetHandler; |
| import org.eclipse.smila.utils.xml.stax.XmlSnippetSplitter; |
| |
| /** |
| * The possible properties are: |
| * <ul> |
| * <li>beginTagName: the name of the tag to start the xml snippet</li> |
| * <li>beginTagNamespace: the (optional) namespace of the tag to start the xml snippet</li> |
| * <li>endTagName: the name of the tag to end the xml snippet</li> |
| * <li>endTagNamespace: the (optional) namespace of the tag to end the xml snippet</li> |
| * <li>inputName: name of the Attribute/Attachment to read the XML Document from.</li> |
| * <li>outputName: name of the Attribute/Attachment to store the extracted value in</li> |
| * <li>inputType: the type (Attribute or Attachment of the inputName). An input Attribute is not interpreted as content |
| * but as a file path or an URL to the XML document</li> |
| * <li>outputType: the type (Attribute or Attachment of the outputName)</li> |
| * </ul> |
| */ |
| public class XmlDocumentSplitterPipelet extends ATransformationPipelet { |
| |
| /** Constant for the property beginTagName. */ |
| public static final String PROP_BEGIN_TAG_NAME = "beginTagName"; |
| |
| /** Constant for the property beginTagNamespace. */ |
| public static final String PROP_BEGIN_TAG_NAMESPACE = "beginTagNamespace"; |
| |
| /** Constant for the property endTagName. */ |
| public static final String PROP_END_TAG_NAME = "endTagName"; |
| |
| /** Constant for the property endTagNamespace. */ |
| public static final String PROP_END_TAG_NAMESPACE = "endTagNamespace"; |
| |
| /** |
| * {@inheritDoc} |
| */ |
| @Override |
| public String[] process(final Blackboard blackboard, final String[] recordIds) throws ProcessingException { |
| if (recordIds == null) { |
| return recordIds; |
| } |
| final ParameterAccessor paramAccessor = new ParameterAccessor(blackboard, _config); |
| final ResultCollector resultCollector = |
| new ResultCollector(paramAccessor, _log, ProcessingConstants.DROP_ON_ERROR_DEFAULT); |
| for (final String id : recordIds) { |
| try { |
| paramAccessor.setCurrentRecord(id); |
| final String beginTagName = paramAccessor.getRequiredParameter(PROP_BEGIN_TAG_NAME); |
| final String beginTagNamespace = paramAccessor.getParameter(PROP_BEGIN_TAG_NAMESPACE, ""); |
| final String endTagName = paramAccessor.getParameter(PROP_END_TAG_NAME, beginTagName); |
| final String endTagNamespace = paramAccessor.getParameter(PROP_END_TAG_NAMESPACE, beginTagNamespace); |
| final QName beginTag = new QName(beginTagNamespace, beginTagName); |
| final QName endTag = new QName(endTagNamespace, endTagName); |
| final String outputName = getOutputName(paramAccessor); |
| final boolean storeInAttribute = isStoreInAttribute(getOutputType(paramAccessor)); |
| final InternalHandler snippetHandler = |
| new InternalHandler(blackboard, id, outputName, storeInAttribute, resultCollector); |
| final XmlSnippetSplitter splitter = new XmlSnippetSplitter(snippetHandler, beginTag, endTag); |
| final InputStream inputStream = getXmlInputStream(blackboard, id, paramAccessor); |
| splitter.read(inputStream); |
| if (_log.isInfoEnabled()) { |
| _log.info("Created " + snippetHandler.getRecordCount() + " records from processing record " + id); |
| } |
| } catch (final Exception e) { |
| resultCollector.addFailedResult(id, e); |
| } |
| } |
| return resultCollector.getResultIds(); |
| } |
| |
| /** get XML input Stream. */ |
| private InputStream getXmlInputStream(final Blackboard blackboard, final String id, |
| final ParameterAccessor paramAccessor) throws IOException, BlackboardAccessException, MissingParameterException { |
| InputStream inputStream = null; |
| if (isReadFromAttribute(getInputType(paramAccessor))) { |
| inputStream = loadExternalInputStream(readStringInput(blackboard, id, paramAccessor)); |
| } else { |
| inputStream = blackboard.getAttachmentAsStream(id, getInputName(paramAccessor)); |
| } |
| return inputStream; |
| } |
| |
| /** |
| * Get the external InputStream to the given url or file path. |
| * |
| * @param attrtibuteValue |
| * the attrtibuteValue denoting an URL or file path |
| * @return a InputStream or null |
| * @throws IOException |
| * if any error occurs |
| */ |
| private InputStream loadExternalInputStream(final String attrtibuteValue) throws IOException { |
| InputStream stream = null; |
| if (attrtibuteValue != null && attrtibuteValue.trim().length() > 0) { |
| if (attrtibuteValue.startsWith("file")) { |
| final URL url = new URL(attrtibuteValue); |
| stream = new FileInputStream(url.getAuthority() + url.getPath()); |
| } else if (attrtibuteValue.startsWith("http")) { |
| final URL url = new URL(attrtibuteValue); |
| final HttpClient httpClient = new HttpClient(); |
| final GetMethod getMethod = new GetMethod(url.toString()); |
| httpClient.executeMethod(getMethod); |
| stream = getMethod.getResponseBodyAsStream(); |
| } else { |
| stream = new FileInputStream(attrtibuteValue); |
| } |
| } // if |
| return stream; |
| } |
| |
| class InternalHandler implements XmlSnippetHandler { |
| |
| private final Blackboard _blackboard; |
| |
| private final String _currentId; |
| |
| private final ResultCollector _resultCollector; |
| |
| private final String _outputName; |
| |
| private final boolean _storeInAttribute; |
| |
| private final AnyMap _cloneMetadata = DataFactory.DEFAULT.createAnyMap(); |
| |
| private int _recordCount; |
| |
| private InternalHandler(final Blackboard blackboard, final String currentId, final String outputName, |
| final boolean storeInAttribute, final ResultCollector resultCollector) throws BlackboardAccessException { |
| _blackboard = blackboard; |
| _currentId = currentId; |
| _outputName = outputName; |
| _storeInAttribute = storeInAttribute; |
| _resultCollector = resultCollector; |
| _cloneMetadata.putAll(_blackboard.getMetadata(_currentId)); |
| _cloneMetadata.remove(Record.RECORD_ID); |
| } |
| |
| public int getRecordCount() { |
| return _recordCount; |
| } |
| |
| /** |
| * {@inheritDoc} |
| */ |
| @Override |
| public void handleSnippet(final byte[] snippet) { |
| final String snippetId = _currentId + DocumentSplitterPipelet.SPLIT_ID_SEPARATOR + _recordCount++; |
| try { |
| final Record snippetRecord = _blackboard.getRecord(snippetId, Get.NEW); |
| snippetRecord.getMetadata().put(DocumentSplitterPipelet.DOCUMENT_ID, _currentId); |
| snippetRecord.getMetadata().putAll(_cloneMetadata); |
| if (_storeInAttribute) { |
| snippetRecord.getMetadata().put(_outputName, new String(snippet, ENCODING_CHARSET)); |
| } else { |
| _blackboard.setAttachment(snippetId, _outputName, snippet); |
| } |
| _resultCollector.addResult(snippetId); |
| } catch (final Exception ex) { |
| _log.warn("Error creating XML-snippet record", ex); |
| } |
| } |
| } |
| } |