| /******************************************************************************* |
| * Copyright (c) 2008, 2013 Empolis Information Management GmbH and brox IT Solutions GmbH. All rights reserved. |
| * This program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: Andreas Weber (Empolis Information Management GmbH) - initial API and implementation |
| *******************************************************************************/ |
| package org.eclipse.smila.tika.test; |
| |
| import java.util.Map; |
| |
| import org.eclipse.smila.datamodel.Any; |
| import org.eclipse.smila.datamodel.AnyMap; |
| import org.eclipse.smila.datamodel.AnySeq; |
| import org.eclipse.smila.datamodel.DataFactory; |
| import org.eclipse.smila.tika.TikaPipelet; |
| |
| /** test for extracting metadata. */ |
| public class TestExtractProperties extends ConverterPipelineTestBase { |
| |
| protected AnyMap _expectedMetadata; |
| |
| /** Tests for Open Office metadata extraction. */ |
| public void testOpenOfficeProps() throws Exception { |
| final String fileName = "OpenOffice.3.2.odp"; |
| final AnyMap additionalRecordParams = DataFactory.DEFAULT.createAnyMap(); |
| final AnySeq extractProperties = DataFactory.DEFAULT.createAnySeq(); |
| additionalRecordParams.put(TikaPipelet.PROP_EXTRACT_PROPERTIES, extractProperties); |
| |
| final AnyMap author = DataFactory.DEFAULT.createAnyMap(); |
| author.put("metadataName", "creator"); |
| author.put("targetAttribute", "ATT_Creator"); |
| extractProperties.add(author); |
| |
| final AnyMap language = DataFactory.DEFAULT.createAnyMap(); |
| language.put("metadataName", "language"); |
| language.put("targetAttribute", "ATT_Language"); |
| extractProperties.add(language); |
| |
| final AnyMap title = DataFactory.DEFAULT.createAnyMap(); |
| title.put("metadataName", "title"); |
| title.put("targetAttribute", "ATT_Title"); |
| extractProperties.add(title); |
| |
| _expectedMetadata.put("ATT_Creator", "Juergen Schuhmacher"); |
| _expectedMetadata.put("ATT_Language", "en-US"); |
| _expectedMetadata.put("ATT_Title", "Introducing a New Product"); |
| |
| executeTest(fileName, additionalRecordParams); |
| } |
| |
| public void testMsOfficeSimple() throws Exception { |
| final String fileName = "MSWORD_2010.docx"; |
| final AnyMap additionalRecordParams = DataFactory.DEFAULT.createAnyMap(); |
| final AnySeq extractProperties = DataFactory.DEFAULT.createAnySeq(); |
| additionalRecordParams.put(TikaPipelet.PROP_EXTRACT_PROPERTIES, extractProperties); |
| |
| final AnyMap title = DataFactory.DEFAULT.createAnyMap(); |
| title.put("metadataName", "title"); |
| title.put("targetAttribute", "ATT_Titel"); |
| extractProperties.add(title); |
| _expectedMetadata.put("ATT_Titel", "Mein Titel"); |
| executeTest(fileName, additionalRecordParams); |
| } |
| |
| /** Tests for MS Office specific properties. */ |
| public void testMsOfficeProps() throws Exception { |
| final String fileName = "MSWORD_2010.docx"; |
| final AnyMap additionalRecordParams = DataFactory.DEFAULT.createAnyMap(); |
| final AnySeq extractProperties = DataFactory.DEFAULT.createAnySeq(); |
| additionalRecordParams.put(TikaPipelet.PROP_EXTRACT_PROPERTIES, extractProperties); |
| |
| final AnyMap author = DataFactory.DEFAULT.createAnyMap(); |
| author.put("metadataName", "Author"); |
| author.put("targetAttribute", "ATT_Author"); |
| extractProperties.add(author); |
| |
| final AnyMap category = DataFactory.DEFAULT.createAnyMap(); |
| category.put("metadataName", "Category"); |
| category.put("targetAttribute", "ATT_Category"); |
| extractProperties.add(category); |
| |
| final AnyMap status = DataFactory.DEFAULT.createAnyMap(); |
| status.put("metadataName", "Content-Status"); |
| status.put("targetAttribute", "ATT_Status"); |
| extractProperties.add(status); |
| |
| final AnyMap manager = DataFactory.DEFAULT.createAnyMap(); |
| manager.put("metadataName", "Manager"); |
| manager.put("targetAttribute", "ATT_Manager"); |
| extractProperties.add(manager); |
| |
| _expectedMetadata.put("ATT_Author", "Andreas;Homer"); |
| _expectedMetadata.put("ATT_Category", "Meine Kategorie"); |
| _expectedMetadata.put("ATT_Status", "Mein Status"); |
| _expectedMetadata.put("ATT_Manager", "Mein Manager"); |
| |
| executeTest(fileName, additionalRecordParams); |
| } |
| |
| /** Tests for parameter 'storeMode'. */ |
| public void testStoreMode() throws Exception { |
| final String fileName = "MSWORD_2010.docx"; |
| final String testParameter = "storeMode"; |
| final AnyMap additionalRecordParams = DataFactory.DEFAULT.createAnyMap(); |
| final AnySeq extractProperties = DataFactory.DEFAULT.createAnySeq(); |
| additionalRecordParams.put(TikaPipelet.PROP_EXTRACT_PROPERTIES, extractProperties); |
| |
| final AnyMap manager = DataFactory.DEFAULT.createAnyMap(); |
| manager.put("metadataName", "Manager"); |
| manager.put("targetAttribute", FILENAME_ATTRIBUTE); // we use an attribute that already has a value |
| extractProperties.add(manager); |
| |
| // add |
| manager.put(testParameter, TikaPipelet.StoreMode.add.name()); |
| _expectedMetadata.add(FILENAME_ATTRIBUTE, DataFactory.DEFAULT.createStringValue("MSWORD_2010.docx")); |
| _expectedMetadata.add(FILENAME_ATTRIBUTE, DataFactory.DEFAULT.createStringValue("Mein Manager")); |
| executeTest(fileName, additionalRecordParams); |
| |
| // leave |
| manager.put(testParameter, TikaPipelet.StoreMode.leave.name()); |
| _expectedMetadata.put(FILENAME_ATTRIBUTE, DataFactory.DEFAULT.createStringValue("MSWORD_2010.docx")); |
| executeTest(fileName, additionalRecordParams); |
| |
| // overwrite |
| manager.put(testParameter, TikaPipelet.StoreMode.overwrite.name()); |
| _expectedMetadata.put(FILENAME_ATTRIBUTE, DataFactory.DEFAULT.createStringValue("Mein Manager")); |
| executeTest(fileName, additionalRecordParams); |
| |
| // default: add |
| manager.remove(testParameter); |
| _expectedMetadata.put(FILENAME_ATTRIBUTE, DataFactory.DEFAULT.createAnySeq()); |
| _expectedMetadata.add(FILENAME_ATTRIBUTE, DataFactory.DEFAULT.createStringValue("MSWORD_2010.docx")); |
| _expectedMetadata.add(FILENAME_ATTRIBUTE, DataFactory.DEFAULT.createStringValue("Mein Manager")); |
| executeTest(fileName, additionalRecordParams); |
| } |
| |
| @Override |
| protected void setUp() throws Exception { |
| super.setUp(); |
| _expectedMetadata = DataFactory.DEFAULT.createAnyMap(); |
| } |
| |
| @Override |
| protected void assertResultMetadata(final AnyMap metadata) { |
| for (final Map.Entry<String, Any> expected : _expectedMetadata.entrySet()) { |
| assertTrue("Expected attribute: " + expected.getKey(), metadata.containsKey(expected.getKey())); |
| assertEquals(expected.getValue(), metadata.get(expected.getKey())); |
| } |
| } |
| } |