blob: 5133cb6f9727da01994396ec9157c7b528754fd9 [file] [log] [blame]
/*******************************************************************************
* This program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
*
* Contributors: Tobias Liefke - initial API and implementation
*******************************************************************************/
package org.eclipse.smila.processing.pipelets.boilerpipe.test;
import java.io.File;
import org.eclipse.smila.blackboard.Blackboard.Get;
import org.eclipse.smila.blackboard.BlackboardAccessException;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.Record;
import org.eclipse.smila.processing.ProcessingException;
import org.eclipse.smila.processing.pipelets.boilerpipe.BoilerpipePipelet;
import org.eclipse.smila.processing.pipelets.test.ATransformationPipeletTest;
/**
* Test the {@link BoilerpipePipelet}.
*
* @author Tobias Liefke
*/
public class TestBoilerpipePipelet extends ATransformationPipeletTest {
/** The ID of the test record. */
private static final String TEST_ID = "testId";
/**
* Test execution with a call to the current java executable.
*
* @throws Exception
* if the test fails
*/
public void testDefaultExtractor() throws Exception {
final AnyMap config = createTestConfig();
final String[] result = configureAndExecute(config);
// Check the result
assertNotNull(result);
assertEquals(1, result.length);
final String resultValue = _blackboard.getMetadata(result[0]).getStringValue("output");
assertNotNull(resultValue);
assertTrue(resultValue.contains("Mailing Lists"));
assertTrue(resultValue.contains("big data"));
}
/**
* Test execution with a call to the current java executable.
*
* @throws Exception
* if the test fails
*/
public void testSentencesExtractor() throws Exception {
final AnyMap config = createTestConfig();
config.getSeq("filter", true).add("de.l3s.boilerpipe.extractors.ArticleSentencesExtractor.INSTANCE");
final String[] resultIds = configureAndExecute(config);
// Check the result
assertNotNull(resultIds);
assertEquals(1, resultIds.length);
final String resultValue = _blackboard.getMetadata(resultIds[0]).getStringValue("output");
assertNotNull(resultValue);
assertFalse(resultValue.contains("Mailing Lists"));
assertTrue(resultValue.contains("big data"));
}
/**
* Test execution with a call to the current java executable.
*
* @throws Exception
* if the test fails
*/
public void testMultipleFilters() throws Exception {
final AnyMap config = createTestConfig();
config.getSeq("filter", true).add("de.l3s.boilerpipe.filters.simple.MarkEverythingContentFilter.INSTANCE");
config.getSeq("filter", true).add("de.l3s.boilerpipe.filters.simple.SplitParagraphBlocksFilter.INSTANCE");
config.getSeq("filter", true).add("de.l3s.boilerpipe.filters.english.TerminatingBlocksFinder.INSTANCE");
final String[] resultIds = configureAndExecute(config);
// Check the result
assertNotNull(resultIds);
assertEquals(1, resultIds.length);
final String resultValue = _blackboard.getMetadata(resultIds[0]).getStringValue("output");
assertNotNull(resultValue);
}
/**
* Test for maxTextBlocks parameter.
*/
public void testMaxTextBlocks() throws Exception {
final int[] maxTestBlockSettings = { -1, 0, 2, 4, 100 };
for (final int maxTextBlocks : maxTestBlockSettings) {
final AnyMap config = createTestConfigMaxTextBlocks(maxTextBlocks);
final String[] resultIds = configureAndExecute(config);
checkMaxTextBlocksResult(resultIds, maxTextBlocks);
}
final int[] maxTestBlockSettings2 = { 0, -1, 2, 4, 100 }; // "0" delivers a result here - for whatever reason
for (final int maxTextBlocks : maxTestBlockSettings2) {
final AnyMap config = createTestConfigMaxTextBlocks(maxTextBlocks);
config.getSeq("filter", true).add("de.l3s.boilerpipe.filters.simple.MarkEverythingContentFilter.INSTANCE");
config.getSeq("filter", true).add("de.l3s.boilerpipe.filters.simple.SplitParagraphBlocksFilter.INSTANCE");
config.getSeq("filter", true).add("de.l3s.boilerpipe.filters.english.TerminatingBlocksFinder.INSTANCE");
final String[] resultIds = configureAndExecute(config);
checkMaxTextBlocksResult(resultIds, maxTextBlocks);
}
}
/** check the result for activated and deactivted maxTestBlocks parameter. */
private void checkMaxTextBlocksResult(final String[] resultIds, final int maxTextBlocks) throws Exception {
assertNotNull(resultIds);
assertEquals(1, resultIds.length);
final String resultValue = _blackboard.getMetadata(resultIds[0]).getStringValue("output");
final boolean deactivated = maxTextBlocks < 0;
final String failMessage = "test failed with maxTextBlocks=" + maxTextBlocks;
if (maxTextBlocks == 0) {
assertTrue(failMessage, resultValue == null || resultValue.isEmpty());
return;
}
assertNotNull(failMessage, resultValue);
if (deactivated || maxTextBlocks > 0) {
assertTrue(failMessage, resultValue.contains("MyText1"));
} else {
assertFalse(failMessage, resultValue.contains("MyText1"));
}
if (deactivated || maxTextBlocks > 1) {
assertTrue(failMessage, resultValue.contains("MyText2"));
} else {
assertFalse(failMessage, resultValue.contains("MyText2"));
}
if (deactivated || maxTextBlocks > 2) {
assertTrue(failMessage, resultValue.contains("MyText3"));
} else {
assertFalse(failMessage, resultValue.contains("MyText3"));
}
}
/**
* Create test configuration.
*
* @return the test configuration
*/
private AnyMap createTestConfig() {
final AnyMap config = _blackboard.getDataFactory().createAnyMap();
config.put("inputName", "input");
config.put("inputType", "ATTACHMENT");
config.put("outputName", "output");
config.put("outputType", "ATTRIBUTE");
return config;
}
/**
* @return the test configuration for maxTestBlock tests
*/
private AnyMap createTestConfigMaxTextBlocks(final int maxTextBlocks) {
final AnyMap config = _blackboard.getDataFactory().createAnyMap();
config.put("inputName", "input-small");
config.put("inputType", "ATTACHMENT");
config.put("outputName", "output");
config.put("outputType", "ATTRIBUTE");
config.put(BoilerpipePipelet.MAX_PARSER_BLOCKS_PROPERTY, maxTextBlocks);
return config;
}
/**
* Create, configure and execute the Pipelet.
*/
private String[] configureAndExecute(final AnyMap config) throws ProcessingException, BlackboardAccessException {
final BoilerpipePipelet pipelet = new BoilerpipePipelet();
pipelet.configure(config);
// Create the test record
final Record record = _blackboard.getRecord(TEST_ID, Get.NEW);
_blackboard.setRecord(record);
_blackboard.setAttachmentFromFile(TEST_ID, "input", new File("./configuration/data/test.html"));
_blackboard.setAttachmentFromFile(TEST_ID, "input-small", new File("./configuration/data/test-small.html"));
// Execute the pipelet
return pipelet.process(_blackboard, new String[] { record.getId() });
}
}