blob: c1eb7dbbe8e02f78b7c5695d6d929913c55a329f [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2008, 2013 Empolis Information Management GmbH and brox IT Solutions GmbH. All rights reserved.
* This program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
*
* Contributors: Andreas Weber (Empolis Information Management GmbH) - initial API and implementation
*******************************************************************************/
package org.eclipse.smila.tika.test;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Collection;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.eclipse.smila.common.mimetype.MimeTypeIdentifier;
import org.eclipse.smila.common.mimetype.MimeTypeParseException;
import org.eclipse.smila.test.DeclarativeServiceTestCase;
import org.eclipse.smila.tika.internal.TikaMimeTypeIdentifyService;
import org.eclipse.smila.utils.config.ConfigUtils;
public class TestTikaMimeTypeIdentifier extends DeclarativeServiceTestCase {
private static final Pattern EXTENSION_PATTERN = Pattern.compile(".*\\.([^\\.]+)$");
private MimeTypeIdentifier _identifier;
@Override
protected void setUp() throws Exception {
super.setUp();
_identifier = getService(MimeTypeIdentifier.class);
assertNotNull(_identifier);
assertTrue(_identifier instanceof TikaMimeTypeIdentifyService);
}
/** Test openoffice 24 odp. */
public void testOPENOFFICE24ODP() throws Exception {
final String fileName = "OpenOffice.2.4.odp";
executeTest(fileName, "application/vnd.oasis.opendocument.presentation", true);
}
/** Test openoffice 24 ods. */
public void testOPENOFFICE24ODS() throws Exception {
final String fileName = "OpenOffice.2.4.ods";
executeTest(fileName, "application/vnd.oasis.opendocument.spreadsheet", true);
}
/** Test openoffice 24 odt. */
public void testOPENOFFICE24ODT() throws Exception {
final String fileName = "OpenOffice.2.4.odt";
executeTest(fileName, "application/vnd.oasis.opendocument.text", true);
}
/** Test openoffice 32 odp. */
public void testOPENOFFICE32ODP() throws Exception {
final String fileName = "OpenOffice.3.2.odp";
executeTest(fileName, "application/vnd.oasis.opendocument.presentation", true);
}
/** Test openoffice 32 ods. */
public void testOPENOFFICE32ODS() throws Exception {
final String fileName = "OpenOffice.3.2.ods";
executeTest(fileName, "application/vnd.oasis.opendocument.spreadsheet", true);
}
/** Test openoffice 32 odt. */
public void testOPENOFFICE32ODT() throws Exception {
final String fileName = "OpenOffice.3.2.odt";
executeTest(fileName, "application/vnd.oasis.opendocument.text", true);
}
/** Test epub. */
public void testEPUB() throws Exception {
final String fileName = "kamasutra.epub";
executeTest(fileName, "application/epub+zip", true);
}
protected void executeTest(final String fileName, final String expectedMimeType, final boolean extensionWasCorrect)
throws MimeTypeParseException, IOException, TikaException {
executeTest(fileName, Arrays.asList(expectedMimeType), extensionWasCorrect);
}
protected void executeTest(final String fileName, final Collection<String> expectedMimeTypes,
final boolean extensionWasCorrect) throws MimeTypeParseException, IOException, TikaException {
final String extension = EXTENSION_PATTERN.matcher(fileName).replaceAll("$1");
// identify with byte[] and extension
try (InputStream inputStream = ConfigUtils.getConfigStream(AllTests.BUNDLE_ID, fileName)) {
assertTrue("Could not identify with byte[] and extension",
expectedMimeTypes.contains(_identifier.identify(IOUtils.toByteArray(inputStream), extension)));
}
// identify with file content stream and extension
try (InputStream inputStream = ConfigUtils.getConfigStream(AllTests.BUNDLE_ID, fileName)) {
assertTrue("Could not identify with InputStream and extension",
expectedMimeTypes.contains(_identifier.identify(inputStream, extension)));
}
// identify with TikaInputStream and extension
final TemporaryResources tmp = new TemporaryResources();
try {
try (InputStream inputStream = ConfigUtils.getConfigStream(AllTests.BUNDLE_ID, fileName)) {
assertTrue("Could not identify with InputStream and extension",
expectedMimeTypes.contains(_identifier.identify(TikaInputStream.get(inputStream, tmp), extension)));
}
// identify with extension only
if (extensionWasCorrect) {
assertTrue("Could not identify with extension only",
expectedMimeTypes.contains(_identifier.identify(extension)));
} else {
// must still yield no exception...
_identifier.identify(extension);
}
// identify with file content only
try (InputStream inputStream = ConfigUtils.getConfigStream(AllTests.BUNDLE_ID, fileName)) {
assertTrue("Could not identify with byte[]",
expectedMimeTypes.contains(_identifier.identify(IOUtils.toByteArray(inputStream))));
}
// identify with file content stream only
try (InputStream inputStream = ConfigUtils.getConfigStream(AllTests.BUNDLE_ID, fileName)) {
assertTrue("Could not identify with InputStream",
expectedMimeTypes.contains(_identifier.identify(inputStream)));
}
// identify with TikaInputStream only
try (InputStream inputStream = ConfigUtils.getConfigStream(AllTests.BUNDLE_ID, fileName)) {
assertTrue("Could not identify with InputStream",
expectedMimeTypes.contains(_identifier.identify(TikaInputStream.get(inputStream, tmp))));
}
} finally {
tmp.dispose();
}
}
}