| /******************************************************************************* |
| * Copyright (c) 2008, 2013 Empolis Information Management GmbH and brox IT Solutions GmbH. All rights reserved. |
| * This program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: Andreas Weber (Empolis Information Management GmbH) - initial API and implementation |
| *******************************************************************************/ |
| package org.eclipse.smila.tika.test; |
| |
| import org.eclipse.smila.blackboard.Blackboard; |
| import org.eclipse.smila.blackboard.Blackboard.Get; |
| import org.eclipse.smila.blackboard.BlackboardFactory; |
| import org.eclipse.smila.common.language.LanguageIdentifyService; |
| import org.eclipse.smila.datamodel.AnyMap; |
| import org.eclipse.smila.datamodel.Record; |
| import org.eclipse.smila.processing.ProcessingException; |
| import org.eclipse.smila.processing.pipelets.LanguageIdentifyPipelet; |
| import org.eclipse.smila.test.DeclarativeServiceTestCase; |
| |
| /** |
| * Test the LanguageIdentifyPipelet. |
| */ |
| public class TestLanguageIdentifyPipelet extends DeclarativeServiceTestCase { |
| |
| public static final String LANGUAGE = "Language"; |
| |
| public static final String ALT_NAME = "AltName"; |
| |
| /** the BlackboardService. */ |
| private Blackboard _blackboard; |
| |
| /** the LanguageIdentifyPipelet. */ |
| private LanguageIdentifyPipelet _lip; |
| |
| @Override |
| protected void setUp() throws Exception { |
| final BlackboardFactory factory = getService(BlackboardFactory.class); |
| assertNotNull("no BlackboardFactory service found.", factory); |
| _blackboard = factory.createPersistingBlackboard(); |
| assertNotNull("no Blackboard created", _blackboard); |
| final LanguageIdentifyService lis = getService(LanguageIdentifyService.class); |
| assertNotNull("no MimeTypeIdentifier service found.", lis); |
| _lip = new LanguageIdentifyPipelet(); |
| assertNotNull(_lip); |
| _lip.configure(null); |
| } |
| |
| @Override |
| protected void tearDown() throws Exception { |
| _blackboard = null; |
| _lip = null; |
| } |
| |
| public void testNoOutput() throws Exception { |
| final String id = "testId0"; |
| final String content = "The empire strikes back."; |
| final AnyMap configuration = createConfiguration(null, null, null, null, false); |
| final Record record = createRecord(id, content); |
| |
| try { |
| doProcess(configuration, record); |
| fail("expected exception"); |
| } catch (final ProcessingException e) { |
| // expected |
| } |
| } |
| |
| public void testLanguageIdentification() throws Exception { |
| final String id = "testId1"; |
| final String content = "The empire strikes back."; |
| final String expectedLanguage = "en"; |
| final String expectedAlternativeName = "english"; |
| |
| final AnyMap configuration = createConfiguration(LANGUAGE, null, ALT_NAME, null, false); |
| final Record record = createRecord(id, content); |
| |
| doProcess(configuration, record); |
| assertLanguage(record.getId(), expectedLanguage, expectedAlternativeName); |
| } |
| |
| public void testLanguageOnly() throws Exception { |
| final String id = "testId1"; |
| final String content = "The empire strikes back."; |
| final String expectedLanguage = "en"; |
| final String expectedAlternativeName = null; |
| |
| final AnyMap configuration = createConfiguration(LANGUAGE, null, null, null, false); |
| final Record record = createRecord(id, content); |
| |
| doProcess(configuration, record); |
| assertLanguage(record.getId(), expectedLanguage, expectedAlternativeName); |
| } |
| |
| public void testAltNameOnly() throws Exception { |
| final String id = "testId1"; |
| final String content = "The empire strikes back."; |
| final String expectedLanguage = null; |
| final String expectedAlternativeName = "english"; |
| |
| final AnyMap configuration = createConfiguration(null, null, ALT_NAME, null, false); |
| final Record record = createRecord(id, content); |
| |
| doProcess(configuration, record); |
| assertLanguage(record.getId(), expectedLanguage, expectedAlternativeName); |
| } |
| |
| public void testCertainLanguages() throws Exception { |
| final String id = "testId3"; |
| final String content = "The empire strikes back."; |
| final String expectedLanguage = null; // language is not certain enough |
| final String expectedAlternativeName = null; |
| |
| final AnyMap configuration = createConfiguration(LANGUAGE, null, ALT_NAME, null, true); |
| final Record record = createRecord(id, content); |
| |
| doProcess(configuration, record); |
| assertLanguage(record.getId(), expectedLanguage, expectedAlternativeName); |
| } |
| |
| public void testNoInput() throws Exception { |
| final String id = "testId4"; |
| final String content = null; |
| final String expectedLanguage = null; // no input |
| final String expectedAlternativeName = null; |
| |
| final AnyMap configuration = createConfiguration(LANGUAGE, null, ALT_NAME, null, false); |
| final Record record = createRecord(id, content); |
| |
| doProcess(configuration, record); |
| assertLanguage(record.getId(), expectedLanguage, expectedAlternativeName); |
| } |
| |
| public void testEmptyInput() throws Exception { |
| final String id = "testId5"; |
| final String content = ""; |
| final String expectedLanguage = null; // empty input |
| final String expectedAlternativeName = null; |
| |
| final AnyMap configuration = createConfiguration(LANGUAGE, null, ALT_NAME, null, false); |
| final Record record = createRecord(id, content); |
| |
| doProcess(configuration, record); |
| assertLanguage(record.getId(), expectedLanguage, expectedAlternativeName); |
| } |
| |
| public void testNoInputDefaultLanguage() throws Exception { |
| final String id = "testId6"; |
| final String content = ""; |
| final String expectedLanguage = "plz"; |
| final String expectedAlternativeName = "pfälzisch"; |
| |
| final AnyMap configuration = |
| createConfiguration(LANGUAGE, expectedLanguage, ALT_NAME, expectedAlternativeName, false); |
| final Record record = createRecord(id, content); |
| |
| doProcess(configuration, record); |
| assertLanguage(record.getId(), expectedLanguage, expectedAlternativeName); |
| } |
| |
| public void testNotCertainDefaultLanguage() throws Exception { |
| final String id = "testId6"; |
| final String content = "The empire strikes back."; |
| final String expectedLanguage = "plz"; |
| final String expectedAlternativeName = "pfälzisch"; |
| |
| final AnyMap configuration = |
| createConfiguration(LANGUAGE, expectedLanguage, ALT_NAME, expectedAlternativeName, true); |
| final Record record = createRecord(id, content); |
| |
| doProcess(configuration, record); |
| assertLanguage(record.getId(), expectedLanguage, expectedAlternativeName); |
| } |
| |
| private void assertLanguage(final String id, final String expectedLanguage, final String expectedAlternativeName) |
| throws Exception { |
| final AnyMap anyMap = _blackboard.getMetadata(id); |
| final String identifiedLanguage = anyMap.getStringValue(LANGUAGE); |
| assertEquals(expectedLanguage, identifiedLanguage); |
| final String altName = anyMap.getStringValue(ALT_NAME); |
| assertEquals(expectedAlternativeName, altName); |
| } |
| |
| private void doProcess(final AnyMap configuration, final Record record) throws Exception { |
| final String[] recordIds = new String[] { record.getId() }; |
| record.getMetadata().put("_parameters", configuration); |
| _blackboard.setRecord(record); |
| final String[] result = _lip.process(_blackboard, recordIds); |
| assertNotNull(result); |
| assertEquals(recordIds.length, result.length); |
| } |
| |
| private AnyMap createConfiguration(final String languageAttribute, final String defaultLanguage, |
| final String alternativeNameAttribute, final String defaulAlternativeName, final boolean useCertainLanguagesOnly) { |
| final AnyMap configuration = _blackboard.getDataFactory().createAnyMap(); |
| configuration.put(LanguageIdentifyPipelet.CONTENT_ATTRIBUTE, "Text"); |
| if (languageAttribute != null) { |
| configuration.put(LanguageIdentifyPipelet.LANGUAGE_ATTRIBUTE, languageAttribute); |
| } |
| if (defaultLanguage != null) { |
| configuration.put(LanguageIdentifyPipelet.DEFAULT_LANGUAGE, defaultLanguage); |
| } |
| if (alternativeNameAttribute != null) { |
| configuration.put(LanguageIdentifyPipelet.ALTERNATIVE_NAME_ATTRIBUTE, alternativeNameAttribute); |
| } |
| if (defaulAlternativeName != null) { |
| configuration.put(LanguageIdentifyPipelet.DEFAULT_ALTERNATIVE_NAME, defaulAlternativeName); |
| } |
| if (useCertainLanguagesOnly) { |
| configuration.put(LanguageIdentifyPipelet.USE_CERTAIN_LANGUAGES_ONLY, useCertainLanguagesOnly); |
| } |
| return configuration; |
| } |
| |
| private Record createRecord(final String idValue, final String content) throws Exception { |
| final String id = "testDataSource:" + idValue; |
| final Record record = _blackboard.getRecord(id, Get.AUTO_CREATE); |
| if (content != null) { |
| final AnyMap anyMap = record.getMetadata(); |
| anyMap.put("Text", content); |
| } |
| return record; |
| } |
| } |