/******************************************************************************* | |
* This program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0 | |
* which accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html | |
* | |
* Contributors: Tobias Liefke - initial API and implementation | |
*******************************************************************************/ | |
package org.eclipse.smila.processing.pipelets.boilerpipe.test; | |
import java.io.File; | |
import org.eclipse.smila.blackboard.Blackboard.Get; | |
import org.eclipse.smila.blackboard.BlackboardAccessException; | |
import org.eclipse.smila.datamodel.AnyMap; | |
import org.eclipse.smila.datamodel.Record; | |
import org.eclipse.smila.processing.ProcessingException; | |
import org.eclipse.smila.processing.pipelets.boilerpipe.BoilerpipePipelet; | |
import org.eclipse.smila.processing.pipelets.test.ATransformationPipeletTest; | |
/** | |
* Test the {@link BoilerpipePipelet}. | |
* | |
* @author Tobias Liefke | |
*/ | |
public class TestBoilerpipePipelet extends ATransformationPipeletTest { | |
/** The ID of the test record. */ | |
private static final String TEST_ID = "testId"; | |
/** | |
* Test execution with a call to the current java executable. | |
* | |
* @throws Exception | |
* if the test fails | |
*/ | |
public void testDefaultExtractor() throws Exception { | |
final AnyMap config = createTestConfig(); | |
final String[] result = configureAndExecute(config); | |
// Check the result | |
assertNotNull(result); | |
assertEquals(1, result.length); | |
final String resultValue = _blackboard.getMetadata(result[0]).getStringValue("output"); | |
assertNotNull(resultValue); | |
assertTrue(resultValue.contains("Mailing Lists")); | |
assertTrue(resultValue.contains("big data")); | |
} | |
/** | |
* Test execution with a call to the current java executable. | |
* | |
* @throws Exception | |
* if the test fails | |
*/ | |
public void testSentencesExtractor() throws Exception { | |
final AnyMap config = createTestConfig(); | |
config.getSeq("filter", true).add("de.l3s.boilerpipe.extractors.ArticleSentencesExtractor.INSTANCE"); | |
final String[] resultIds = configureAndExecute(config); | |
// Check the result | |
assertNotNull(resultIds); | |
assertEquals(1, resultIds.length); | |
final String resultValue = _blackboard.getMetadata(resultIds[0]).getStringValue("output"); | |
assertNotNull(resultValue); | |
assertFalse(resultValue.contains("Mailing Lists")); | |
assertTrue(resultValue.contains("big data")); | |
} | |
/** | |
* Test execution with a call to the current java executable. | |
* | |
* @throws Exception | |
* if the test fails | |
*/ | |
public void testMultipleFilters() throws Exception { | |
final AnyMap config = createTestConfig(); | |
config.getSeq("filter", true).add("de.l3s.boilerpipe.filters.simple.MarkEverythingContentFilter.INSTANCE"); | |
config.getSeq("filter", true).add("de.l3s.boilerpipe.filters.simple.SplitParagraphBlocksFilter.INSTANCE"); | |
config.getSeq("filter", true).add("de.l3s.boilerpipe.filters.english.TerminatingBlocksFinder.INSTANCE"); | |
final String[] resultIds = configureAndExecute(config); | |
// Check the result | |
assertNotNull(resultIds); | |
assertEquals(1, resultIds.length); | |
final String resultValue = _blackboard.getMetadata(resultIds[0]).getStringValue("output"); | |
assertNotNull(resultValue); | |
} | |
/** | |
* Test for maxTextBlocks parameter. | |
*/ | |
public void testMaxTextBlocks() throws Exception { | |
final int[] maxTestBlockSettings = { -1, 0, 2, 4, 100 }; | |
for (final int maxTextBlocks : maxTestBlockSettings) { | |
final AnyMap config = createTestConfigMaxTextBlocks(maxTextBlocks); | |
final String[] resultIds = configureAndExecute(config); | |
checkMaxTextBlocksResult(resultIds, maxTextBlocks); | |
} | |
final int[] maxTestBlockSettings2 = { 0, -1, 2, 4, 100 }; // "0" delivers a result here - for whatever reason | |
for (final int maxTextBlocks : maxTestBlockSettings2) { | |
final AnyMap config = createTestConfigMaxTextBlocks(maxTextBlocks); | |
config.getSeq("filter", true).add("de.l3s.boilerpipe.filters.simple.MarkEverythingContentFilter.INSTANCE"); | |
config.getSeq("filter", true).add("de.l3s.boilerpipe.filters.simple.SplitParagraphBlocksFilter.INSTANCE"); | |
config.getSeq("filter", true).add("de.l3s.boilerpipe.filters.english.TerminatingBlocksFinder.INSTANCE"); | |
final String[] resultIds = configureAndExecute(config); | |
checkMaxTextBlocksResult(resultIds, maxTextBlocks); | |
} | |
} | |
/** check the result for activated and deactivted maxTestBlocks parameter. */ | |
private void checkMaxTextBlocksResult(final String[] resultIds, final int maxTextBlocks) throws Exception { | |
assertNotNull(resultIds); | |
assertEquals(1, resultIds.length); | |
final String resultValue = _blackboard.getMetadata(resultIds[0]).getStringValue("output"); | |
final boolean deactivated = maxTextBlocks < 0; | |
final String failMessage = "test failed with maxTextBlocks=" + maxTextBlocks; | |
if (maxTextBlocks == 0) { | |
assertTrue(failMessage, resultValue == null || resultValue.isEmpty()); | |
return; | |
} | |
assertNotNull(failMessage, resultValue); | |
if (deactivated || maxTextBlocks > 0) { | |
assertTrue(failMessage, resultValue.contains("MyText1")); | |
} else { | |
assertFalse(failMessage, resultValue.contains("MyText1")); | |
} | |
if (deactivated || maxTextBlocks > 1) { | |
assertTrue(failMessage, resultValue.contains("MyText2")); | |
} else { | |
assertFalse(failMessage, resultValue.contains("MyText2")); | |
} | |
if (deactivated || maxTextBlocks > 2) { | |
assertTrue(failMessage, resultValue.contains("MyText3")); | |
} else { | |
assertFalse(failMessage, resultValue.contains("MyText3")); | |
} | |
} | |
/** | |
* Create test configuration. | |
* | |
* @return the test configuration | |
*/ | |
private AnyMap createTestConfig() { | |
final AnyMap config = _blackboard.getDataFactory().createAnyMap(); | |
config.put("inputName", "input"); | |
config.put("inputType", "ATTACHMENT"); | |
config.put("outputName", "output"); | |
config.put("outputType", "ATTRIBUTE"); | |
return config; | |
} | |
/** | |
* @return the test configuration for maxTestBlock tests | |
*/ | |
private AnyMap createTestConfigMaxTextBlocks(final int maxTextBlocks) { | |
final AnyMap config = _blackboard.getDataFactory().createAnyMap(); | |
config.put("inputName", "input-small"); | |
config.put("inputType", "ATTACHMENT"); | |
config.put("outputName", "output"); | |
config.put("outputType", "ATTRIBUTE"); | |
config.put(BoilerpipePipelet.MAX_PARSER_BLOCKS_PROPERTY, maxTextBlocks); | |
return config; | |
} | |
/** | |
* Create, configure and execute the Pipelet. | |
*/ | |
private String[] configureAndExecute(final AnyMap config) throws ProcessingException, BlackboardAccessException { | |
final BoilerpipePipelet pipelet = new BoilerpipePipelet(); | |
pipelet.configure(config); | |
// Create the test record | |
final Record record = _blackboard.getRecord(TEST_ID, Get.NEW); | |
_blackboard.setRecord(record); | |
_blackboard.setAttachmentFromFile(TEST_ID, "input", new File("./configuration/data/test.html")); | |
_blackboard.setAttachmentFromFile(TEST_ID, "input-small", new File("./configuration/data/test-small.html")); | |
// Execute the pipelet | |
return pipelet.process(_blackboard, new String[] { record.getId() }); | |
} | |
} |