blob: cadb10a0b52e81076c3fda0c911eeb9d41f37da2 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2008, 2013 Empolis Information Management GmbH and brox IT Solutions GmbH. All rights reserved. This
* program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0 which
* accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
*
* Contributors: Andreas Weber (Empolis Information Management GmbH) - initial API and implementation
*******************************************************************************/
package org.eclipse.smila.tika.test;
import java.io.BufferedInputStream;
import java.io.InputStream;
import java.util.Map;
import org.apache.commons.io.IOUtils;
import org.eclipse.smila.blackboard.Blackboard;
import org.eclipse.smila.blackboard.BlackboardFactory;
import org.eclipse.smila.datamodel.Any;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.DataFactory;
import org.eclipse.smila.datamodel.Record;
import org.eclipse.smila.processing.WorkflowProcessor;
import org.eclipse.smila.test.DeclarativeServiceTestCase;
import org.eclipse.smila.tika.TikaPipelet;
import org.eclipse.smila.utils.config.ConfigUtils;
public abstract class ConverterPipelineTestBase extends DeclarativeServiceTestCase {
/** name of pipeline to test. */
protected static final String PIPELINE_NAME = "TikaPipeline";
protected static final String FILENAME_ATTRIBUTE = "FileName";
protected static final String CONTENT_TYPE_ATTRIBUTE = "ContentType";
protected static final String CONTENT_TYPE_PARAM = "internContentTypeParam";
/** WorkflowProcessor instance to test. */
protected WorkflowProcessor _processor;
/** The _blackboard. */
protected Blackboard _blackboard;
/** output attribute. */
protected String _outputAttribute = "Text";
@Override
protected void setUp() throws Exception {
super.setUp();
_processor = getService(WorkflowProcessor.class);
assertNotNull("no WorkflowProcessor service found.", _processor);
final BlackboardFactory factory = getService(BlackboardFactory.class);
assertNotNull("no BlackboardFactory service found.", factory);
_blackboard = factory.createTransientBlackboard();
assertNotNull("no Blackboard created", _blackboard);
}
@Override
protected void tearDown() throws Exception {
_processor = null;
_blackboard = null;
super.tearDown();
}
/** overwrite this method if oyur test case should store the results in attachments. */
protected boolean useAttributes() {
return true;
}
/** extract HTML or text content from given file. */
protected String executeTest(final String fileName, final AnyMap additionalRecordParams) throws Exception {
BufferedInputStream input = null;
try {
input = new BufferedInputStream(ConfigUtils.getConfigStream(AllTests.BUNDLE_ID, fileName));
final String result = extract(fileName, input, additionalRecordParams);
assertNotNull(result);
return result;
} catch (final Exception e) {
e.printStackTrace();
throw e;
} finally {
IOUtils.closeQuietly(input);
}
}
/** call Tika pipeline. */
protected String[] callPipeline(final String fileName, final InputStream inputStream,
final AnyMap additionalParams) throws Exception {
final String request = initBlackboardRecord("source", fileName, inputStream, additionalParams);
final String[] result = _processor.process(PIPELINE_NAME, _blackboard, new String[] { request });
if (additionalParams.containsKey(TikaPipelet.PROP_PAGE_BREAK)
&& additionalParams.getBooleanValue(TikaPipelet.PROP_PAGE_BREAK)
&& !additionalParams.containsKey(TikaPipelet.PROP_PARTS_ATTRIBUTE)) {
assertNotSame(request, result[0]);
} else {
assertEquals(request, result[0]);
}
return result;
}
/** call Tika pipeline for extracting text from inputStream content. */
protected String extract(final String fileName, final InputStream inputStream, final AnyMap additionalParams)
throws Exception {
final String[] result = callPipeline(fileName, inputStream, additionalParams);
assertEquals(1, result.length);
assertResultMetadata(_blackboard.getMetadata(result[0]));
if (useAttributes()) {
return _blackboard.getMetadata(result[0]).getStringValue(_outputAttribute);
} else {
return new String(_blackboard.getAttachmentAsBytes(result[0], _outputAttribute), "utf-8");
}
}
/** check result record. overwrite in subclass if appropriate for test. */
protected void assertResultMetadata(final AnyMap metadata) {
// do nothing by default.
}
/** create a new record on the blackboard with an attachment. */
protected String initBlackboardRecord(final String source, final String fileName, final InputStream inputStream,
final AnyMap additionalParams) throws Exception {
final Record record = DataFactory.DEFAULT.createRecord("key:" + fileName, source);
record.getMetadata().put(FILENAME_ATTRIBUTE, fileName);
if (additionalParams != null) {
if (additionalParams.containsKey(CONTENT_TYPE_PARAM)) {
record.getMetadata().put(CONTENT_TYPE_ATTRIBUTE, additionalParams.get(CONTENT_TYPE_PARAM));
}
if (!useAttributes() && !additionalParams.containsKey("outputType")) {
additionalParams.put("outputType", "ATTACHMENT");
}
for (final Map.Entry<String, Any> mapEntry : additionalParams.entrySet()) {
record.getMetadata().getMap("_parameters", true).put(mapEntry.getKey(), mapEntry.getValue());
}
}
_blackboard.unloadRecord(record.getId());
_blackboard.setRecord(record);
_blackboard.setAttachmentFromStream(record.getId(), "Content", inputStream);
return record.getId();
}
}