blob: dc85045caf836e740d6e82b7198edc1bcecb31be [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2008, 2013 Empolis Information Management GmbH and brox IT Solutions GmbH. All rights reserved.
* This program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
*
* Contributors: Andreas Weber (Empolis Information Management GmbH) - initial API and implementation
*******************************************************************************/
package org.eclipse.smila.tika.test;
import java.util.Map;
import org.eclipse.smila.datamodel.Any;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.AnySeq;
import org.eclipse.smila.datamodel.DataFactory;
import org.eclipse.smila.tika.TikaPipelet;
/** test for extracting metadata. */
public class TestExtractPropertiesFromMediaFiles extends ConverterPipelineTestBase {
protected final AnyMap _expectedMetadata = DataFactory.DEFAULT.createAnyMap();
public void testEpubMetadata() throws Exception {
final String fileName = "kamasutra.epub";
final AnyMap additionalRecordParams = DataFactory.DEFAULT.createAnyMap();
final AnySeq extractProperties = DataFactory.DEFAULT.createAnySeq();
additionalRecordParams.put(TikaPipelet.PROP_EXTRACT_PROPERTIES, extractProperties);
final AnyMap author = DataFactory.DEFAULT.createAnyMap();
author.put("metadataName", "Author");
author.put("targetAttribute", "ATT_Author");
extractProperties.add(author);
final AnyMap title = DataFactory.DEFAULT.createAnyMap();
title.put("metadataName", "title");
title.put("targetAttribute", "ATT_Title");
extractProperties.add(title);
final AnyMap subject = DataFactory.DEFAULT.createAnyMap();
subject.put("metadataName", "subject");
subject.put("targetAttribute", "ATT_Subject");
extractProperties.add(subject);
_expectedMetadata.put("ATT_Author", "Vatsyayana");
_expectedMetadata
.put(
"ATT_Title",
"The Kama Sutra of Vatsyayana / Translated From the Sanscrit in Seven Parts With Preface, Introduction and Concluding Remarks");
_expectedMetadata.getSeq("ATT_Subject", true).add("Love");
_expectedMetadata.getSeq("ATT_Subject", true).add("Sex");
final String text = executeTest(fileName, additionalRecordParams);
assertTrue(text.contains("The Project Gutenberg EBook of The Kama Sutra of Vatsyayana, by Vatsyayana"));
}
public void testMp3Metadata() throws Exception {
final String fileName = "13-Untitled.mp3";
final AnyMap additionalRecordParams = DataFactory.DEFAULT.createAnyMap();
final AnySeq extractProperties = DataFactory.DEFAULT.createAnySeq();
additionalRecordParams.put(TikaPipelet.PROP_EXTRACT_PROPERTIES, extractProperties);
final AnyMap author = DataFactory.DEFAULT.createAnyMap();
author.put("metadataName", "Author");
author.put("targetAttribute", "ATT_Author");
extractProperties.add(author);
final AnyMap title = DataFactory.DEFAULT.createAnyMap();
title.put("metadataName", "title");
title.put("targetAttribute", "ATT_Titel");
extractProperties.add(title);
final AnyMap album = DataFactory.DEFAULT.createAnyMap();
album.put("metadataName", "xmpDM:album");
album.put("targetAttribute", "ATT_Album");
extractProperties.add(album);
_expectedMetadata.put("ATT_Author", "Fant\u00f4mas");
_expectedMetadata.put("ATT_Titel", "Untitled");
_expectedMetadata.put("ATT_Album", "The Director's Cut");
executeTest(fileName, additionalRecordParams);
}
public void testJpgMetadata() throws Exception {
final String fileName = "portrait.jpg";
final AnyMap additionalRecordParams = DataFactory.DEFAULT.createAnyMap();
final AnySeq extractProperties = DataFactory.DEFAULT.createAnySeq();
additionalRecordParams.put(TikaPipelet.PROP_EXTRACT_PROPERTIES, extractProperties);
final AnyMap height = DataFactory.DEFAULT.createAnyMap();
height.put("metadataName", "tiff:ImageLength");
height.put("targetAttribute", "ATT_Height");
extractProperties.add(height);
final AnyMap width = DataFactory.DEFAULT.createAnyMap();
width.put("metadataName", "tiff:ImageWidth");
width.put("targetAttribute", "ATT_Width");
extractProperties.add(width);
final AnyMap title = DataFactory.DEFAULT.createAnyMap();
title.put("metadataName", "tiff:Make");
title.put("targetAttribute", "ATT_Make");
extractProperties.add(title);
final AnyMap album = DataFactory.DEFAULT.createAnyMap();
album.put("metadataName", "tiff:Model");
album.put("targetAttribute", "ATT_Model");
extractProperties.add(album);
_expectedMetadata.put("ATT_Height", "100");
_expectedMetadata.put("ATT_Width", "78");
_expectedMetadata.put("ATT_Make", "NIKON CORPORATION");
_expectedMetadata.put("ATT_Model", "NIKON D3100");
executeTest(fileName, additionalRecordParams);
}
@Override
protected void assertResultMetadata(final AnyMap metadata) {
for (final Map.Entry<String, Any> expected : _expectedMetadata.entrySet()) {
assertTrue("Expected attribute: " + expected.getKey(), metadata.containsKey(expected.getKey()));
assertEquals(expected.getValue(), metadata.get(expected.getKey()));
}
}
}