blob: 207406289be92b354ccaf3c79178620574a86677 [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2008, 2013 Empolis Information Management GmbH and brox IT Solutions GmbH. All rights reserved.
* This program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
*
* Contributors: Andreas Weber (Empolis Information Management GmbH) - initial API and implementation
*******************************************************************************/
package org.eclipse.smila.tika.test;
import org.eclipse.smila.blackboard.Blackboard;
import org.eclipse.smila.blackboard.Blackboard.Get;
import org.eclipse.smila.blackboard.BlackboardFactory;
import org.eclipse.smila.common.language.LanguageIdentifyService;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.Record;
import org.eclipse.smila.processing.ProcessingException;
import org.eclipse.smila.processing.pipelets.LanguageIdentifyPipelet;
import org.eclipse.smila.test.DeclarativeServiceTestCase;
/**
* Test the LanguageIdentifyPipelet.
*/
public class TestLanguageIdentifyPipelet extends DeclarativeServiceTestCase {
public static final String LANGUAGE = "Language";
public static final String ALT_NAME = "AltName";
/** the BlackboardService. */
private Blackboard _blackboard;
/** the LanguageIdentifyPipelet. */
private LanguageIdentifyPipelet _lip;
@Override
protected void setUp() throws Exception {
final BlackboardFactory factory = getService(BlackboardFactory.class);
assertNotNull("no BlackboardFactory service found.", factory);
_blackboard = factory.createPersistingBlackboard();
assertNotNull("no Blackboard created", _blackboard);
final LanguageIdentifyService lis = getService(LanguageIdentifyService.class);
assertNotNull("no MimeTypeIdentifier service found.", lis);
_lip = new LanguageIdentifyPipelet();
assertNotNull(_lip);
_lip.configure(null);
}
@Override
protected void tearDown() throws Exception {
_blackboard = null;
_lip = null;
}
public void testNoOutput() throws Exception {
final String id = "testId0";
final String content = "The empire strikes back.";
final AnyMap configuration = createConfiguration(null, null, null, null, false);
final Record record = createRecord(id, content);
try {
doProcess(configuration, record);
fail("expected exception");
} catch (final ProcessingException e) {
// expected
}
}
public void testLanguageIdentification() throws Exception {
final String id = "testId1";
final String content = "The empire strikes back.";
final String expectedLanguage = "en";
final String expectedAlternativeName = "english";
final AnyMap configuration = createConfiguration(LANGUAGE, null, ALT_NAME, null, false);
final Record record = createRecord(id, content);
doProcess(configuration, record);
assertLanguage(record.getId(), expectedLanguage, expectedAlternativeName);
}
public void testLanguageOnly() throws Exception {
final String id = "testId1";
final String content = "The empire strikes back.";
final String expectedLanguage = "en";
final String expectedAlternativeName = null;
final AnyMap configuration = createConfiguration(LANGUAGE, null, null, null, false);
final Record record = createRecord(id, content);
doProcess(configuration, record);
assertLanguage(record.getId(), expectedLanguage, expectedAlternativeName);
}
public void testAltNameOnly() throws Exception {
final String id = "testId1";
final String content = "The empire strikes back.";
final String expectedLanguage = null;
final String expectedAlternativeName = "english";
final AnyMap configuration = createConfiguration(null, null, ALT_NAME, null, false);
final Record record = createRecord(id, content);
doProcess(configuration, record);
assertLanguage(record.getId(), expectedLanguage, expectedAlternativeName);
}
public void testCertainLanguages() throws Exception {
final String id = "testId3";
final String content = "The empire strikes back.";
final String expectedLanguage = null; // language is not certain enough
final String expectedAlternativeName = null;
final AnyMap configuration = createConfiguration(LANGUAGE, null, ALT_NAME, null, true);
final Record record = createRecord(id, content);
doProcess(configuration, record);
assertLanguage(record.getId(), expectedLanguage, expectedAlternativeName);
}
public void testNoInput() throws Exception {
final String id = "testId4";
final String content = null;
final String expectedLanguage = null; // no input
final String expectedAlternativeName = null;
final AnyMap configuration = createConfiguration(LANGUAGE, null, ALT_NAME, null, false);
final Record record = createRecord(id, content);
doProcess(configuration, record);
assertLanguage(record.getId(), expectedLanguage, expectedAlternativeName);
}
public void testEmptyInput() throws Exception {
final String id = "testId5";
final String content = "";
final String expectedLanguage = null; // empty input
final String expectedAlternativeName = null;
final AnyMap configuration = createConfiguration(LANGUAGE, null, ALT_NAME, null, false);
final Record record = createRecord(id, content);
doProcess(configuration, record);
assertLanguage(record.getId(), expectedLanguage, expectedAlternativeName);
}
public void testNoInputDefaultLanguage() throws Exception {
final String id = "testId6";
final String content = "";
final String expectedLanguage = "plz";
final String expectedAlternativeName = "pfälzisch";
final AnyMap configuration =
createConfiguration(LANGUAGE, expectedLanguage, ALT_NAME, expectedAlternativeName, false);
final Record record = createRecord(id, content);
doProcess(configuration, record);
assertLanguage(record.getId(), expectedLanguage, expectedAlternativeName);
}
public void testNotCertainDefaultLanguage() throws Exception {
final String id = "testId6";
final String content = "The empire strikes back.";
final String expectedLanguage = "plz";
final String expectedAlternativeName = "pfälzisch";
final AnyMap configuration =
createConfiguration(LANGUAGE, expectedLanguage, ALT_NAME, expectedAlternativeName, true);
final Record record = createRecord(id, content);
doProcess(configuration, record);
assertLanguage(record.getId(), expectedLanguage, expectedAlternativeName);
}
private void assertLanguage(final String id, final String expectedLanguage, final String expectedAlternativeName)
throws Exception {
final AnyMap anyMap = _blackboard.getMetadata(id);
final String identifiedLanguage = anyMap.getStringValue(LANGUAGE);
assertEquals(expectedLanguage, identifiedLanguage);
final String altName = anyMap.getStringValue(ALT_NAME);
assertEquals(expectedAlternativeName, altName);
}
private void doProcess(final AnyMap configuration, final Record record) throws Exception {
final String[] recordIds = new String[] { record.getId() };
record.getMetadata().put("_parameters", configuration);
_blackboard.setRecord(record);
final String[] result = _lip.process(_blackboard, recordIds);
assertNotNull(result);
assertEquals(recordIds.length, result.length);
}
private AnyMap createConfiguration(final String languageAttribute, final String defaultLanguage,
final String alternativeNameAttribute, final String defaulAlternativeName, final boolean useCertainLanguagesOnly) {
final AnyMap configuration = _blackboard.getDataFactory().createAnyMap();
configuration.put(LanguageIdentifyPipelet.CONTENT_ATTRIBUTE, "Text");
if (languageAttribute != null) {
configuration.put(LanguageIdentifyPipelet.LANGUAGE_ATTRIBUTE, languageAttribute);
}
if (defaultLanguage != null) {
configuration.put(LanguageIdentifyPipelet.DEFAULT_LANGUAGE, defaultLanguage);
}
if (alternativeNameAttribute != null) {
configuration.put(LanguageIdentifyPipelet.ALTERNATIVE_NAME_ATTRIBUTE, alternativeNameAttribute);
}
if (defaulAlternativeName != null) {
configuration.put(LanguageIdentifyPipelet.DEFAULT_ALTERNATIVE_NAME, defaulAlternativeName);
}
if (useCertainLanguagesOnly) {
configuration.put(LanguageIdentifyPipelet.USE_CERTAIN_LANGUAGES_ONLY, useCertainLanguagesOnly);
}
return configuration;
}
private Record createRecord(final String idValue, final String content) throws Exception {
final String id = "testDataSource:" + idValue;
final Record record = _blackboard.getRecord(id, Get.AUTO_CREATE);
if (content != null) {
final AnyMap anyMap = record.getMetadata();
anyMap.put("Text", content);
}
return record;
}
}