| /******************************************************************************* |
| * Copyright (c) 2008, 2013 Empolis Information Management GmbH and brox IT Solutions GmbH. All rights reserved. This |
| * program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0 which |
| * accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: Andreas Weber (Empolis Information Management GmbH) - initial API and implementation |
| *******************************************************************************/ |
| package org.eclipse.smila.tika.test; |
| |
| import java.io.BufferedInputStream; |
| import java.io.InputStream; |
| import java.util.Map; |
| |
| import org.apache.commons.io.IOUtils; |
| import org.eclipse.smila.blackboard.Blackboard; |
| import org.eclipse.smila.blackboard.BlackboardFactory; |
| import org.eclipse.smila.datamodel.Any; |
| import org.eclipse.smila.datamodel.AnyMap; |
| import org.eclipse.smila.datamodel.DataFactory; |
| import org.eclipse.smila.datamodel.Record; |
| import org.eclipse.smila.processing.WorkflowProcessor; |
| import org.eclipse.smila.test.DeclarativeServiceTestCase; |
| import org.eclipse.smila.tika.TikaPipelet; |
| import org.eclipse.smila.utils.config.ConfigUtils; |
| |
| public abstract class ConverterPipelineTestBase extends DeclarativeServiceTestCase { |
| |
| /** name of pipeline to test. */ |
| protected static final String PIPELINE_NAME = "TikaPipeline"; |
| |
| protected static final String FILENAME_ATTRIBUTE = "FileName"; |
| |
| protected static final String CONTENT_TYPE_ATTRIBUTE = "ContentType"; |
| |
| protected static final String CONTENT_TYPE_PARAM = "internContentTypeParam"; |
| |
| /** WorkflowProcessor instance to test. */ |
| protected WorkflowProcessor _processor; |
| |
| /** The _blackboard. */ |
| protected Blackboard _blackboard; |
| |
| /** output attribute. */ |
| protected String _outputAttribute = "Text"; |
| |
| @Override |
| protected void setUp() throws Exception { |
| super.setUp(); |
| _processor = getService(WorkflowProcessor.class); |
| assertNotNull("no WorkflowProcessor service found.", _processor); |
| final BlackboardFactory factory = getService(BlackboardFactory.class); |
| assertNotNull("no BlackboardFactory service found.", factory); |
| _blackboard = factory.createTransientBlackboard(); |
| assertNotNull("no Blackboard created", _blackboard); |
| } |
| |
| @Override |
| protected void tearDown() throws Exception { |
| _processor = null; |
| _blackboard = null; |
| super.tearDown(); |
| } |
| |
| /** overwrite this method if oyur test case should store the results in attachments. */ |
| protected boolean useAttributes() { |
| return true; |
| } |
| |
| /** extract HTML or text content from given file. */ |
| protected String executeTest(final String fileName, final AnyMap additionalRecordParams) throws Exception { |
| BufferedInputStream input = null; |
| try { |
| input = new BufferedInputStream(ConfigUtils.getConfigStream(AllTests.BUNDLE_ID, fileName)); |
| final String result = extract(fileName, input, additionalRecordParams); |
| assertNotNull(result); |
| return result; |
| } catch (final Exception e) { |
| e.printStackTrace(); |
| throw e; |
| } finally { |
| IOUtils.closeQuietly(input); |
| } |
| } |
| |
| /** call Tika pipeline. */ |
| protected String[] callPipeline(final String fileName, final InputStream inputStream, |
| final AnyMap additionalParams) throws Exception { |
| final String request = initBlackboardRecord("source", fileName, inputStream, additionalParams); |
| final String[] result = _processor.process(PIPELINE_NAME, _blackboard, new String[] { request }); |
| |
| if (additionalParams.containsKey(TikaPipelet.PROP_PAGE_BREAK) |
| && additionalParams.getBooleanValue(TikaPipelet.PROP_PAGE_BREAK) |
| && !additionalParams.containsKey(TikaPipelet.PROP_PARTS_ATTRIBUTE)) { |
| assertNotSame(request, result[0]); |
| } else { |
| assertEquals(request, result[0]); |
| } |
| return result; |
| } |
| |
| /** call Tika pipeline for extracting text from inputStream content. */ |
| protected String extract(final String fileName, final InputStream inputStream, final AnyMap additionalParams) |
| throws Exception { |
| final String[] result = callPipeline(fileName, inputStream, additionalParams); |
| assertEquals(1, result.length); |
| |
| assertResultMetadata(_blackboard.getMetadata(result[0])); |
| if (useAttributes()) { |
| return _blackboard.getMetadata(result[0]).getStringValue(_outputAttribute); |
| } else { |
| return new String(_blackboard.getAttachmentAsBytes(result[0], _outputAttribute), "utf-8"); |
| } |
| } |
| |
| /** check result record. overwrite in subclass if appropriate for test. */ |
| protected void assertResultMetadata(final AnyMap metadata) { |
| // do nothing by default. |
| } |
| |
| /** create a new record on the blackboard with an attachment. */ |
| protected String initBlackboardRecord(final String source, final String fileName, final InputStream inputStream, |
| final AnyMap additionalParams) throws Exception { |
| final Record record = DataFactory.DEFAULT.createRecord("key:" + fileName, source); |
| record.getMetadata().put(FILENAME_ATTRIBUTE, fileName); |
| if (additionalParams != null) { |
| if (additionalParams.containsKey(CONTENT_TYPE_PARAM)) { |
| record.getMetadata().put(CONTENT_TYPE_ATTRIBUTE, additionalParams.get(CONTENT_TYPE_PARAM)); |
| } |
| if (!useAttributes() && !additionalParams.containsKey("outputType")) { |
| additionalParams.put("outputType", "ATTACHMENT"); |
| } |
| for (final Map.Entry<String, Any> mapEntry : additionalParams.entrySet()) { |
| record.getMetadata().getMap("_parameters", true).put(mapEntry.getKey(), mapEntry.getValue()); |
| } |
| } |
| _blackboard.unloadRecord(record.getId()); |
| _blackboard.setRecord(record); |
| _blackboard.setAttachmentFromStream(record.getId(), "Content", inputStream); |
| return record.getId(); |
| } |
| } |