| /******************************************************************************* |
| * Copyright (c) 2008, 2013 Empolis Information Management GmbH and brox IT Solutions GmbH. All rights reserved. |
| * This program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0 |
| * which accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: Andreas Weber (Empolis Information Management GmbH) - initial API and implementation |
| *******************************************************************************/ |
| package org.eclipse.smila.tika.test; |
| |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.util.Arrays; |
| import java.util.Collection; |
| import java.util.regex.Pattern; |
| |
| import org.apache.commons.io.IOUtils; |
| import org.apache.tika.exception.TikaException; |
| import org.apache.tika.io.TemporaryResources; |
| import org.apache.tika.io.TikaInputStream; |
| import org.eclipse.smila.common.mimetype.MimeTypeIdentifier; |
| import org.eclipse.smila.common.mimetype.MimeTypeParseException; |
| import org.eclipse.smila.test.DeclarativeServiceTestCase; |
| import org.eclipse.smila.tika.internal.TikaMimeTypeIdentifyService; |
| import org.eclipse.smila.utils.config.ConfigUtils; |
| |
| public class TestTikaMimeTypeIdentifier extends DeclarativeServiceTestCase { |
| |
| private static final Pattern EXTENSION_PATTERN = Pattern.compile(".*\\.([^\\.]+)$"); |
| |
| private MimeTypeIdentifier _identifier; |
| |
| @Override |
| protected void setUp() throws Exception { |
| super.setUp(); |
| _identifier = getService(MimeTypeIdentifier.class); |
| assertNotNull(_identifier); |
| assertTrue(_identifier instanceof TikaMimeTypeIdentifyService); |
| } |
| |
| /** Test openoffice 24 odp. */ |
| public void testOPENOFFICE24ODP() throws Exception { |
| final String fileName = "OpenOffice.2.4.odp"; |
| executeTest(fileName, "application/vnd.oasis.opendocument.presentation", true); |
| } |
| |
| /** Test openoffice 24 ods. */ |
| public void testOPENOFFICE24ODS() throws Exception { |
| final String fileName = "OpenOffice.2.4.ods"; |
| executeTest(fileName, "application/vnd.oasis.opendocument.spreadsheet", true); |
| } |
| |
| /** Test openoffice 24 odt. */ |
| public void testOPENOFFICE24ODT() throws Exception { |
| final String fileName = "OpenOffice.2.4.odt"; |
| executeTest(fileName, "application/vnd.oasis.opendocument.text", true); |
| } |
| |
| /** Test openoffice 32 odp. */ |
| public void testOPENOFFICE32ODP() throws Exception { |
| final String fileName = "OpenOffice.3.2.odp"; |
| executeTest(fileName, "application/vnd.oasis.opendocument.presentation", true); |
| } |
| |
| /** Test openoffice 32 ods. */ |
| public void testOPENOFFICE32ODS() throws Exception { |
| final String fileName = "OpenOffice.3.2.ods"; |
| executeTest(fileName, "application/vnd.oasis.opendocument.spreadsheet", true); |
| } |
| |
| /** Test openoffice 32 odt. */ |
| public void testOPENOFFICE32ODT() throws Exception { |
| final String fileName = "OpenOffice.3.2.odt"; |
| executeTest(fileName, "application/vnd.oasis.opendocument.text", true); |
| } |
| |
| /** Test epub. */ |
| public void testEPUB() throws Exception { |
| final String fileName = "kamasutra.epub"; |
| executeTest(fileName, "application/epub+zip", true); |
| } |
| |
| protected void executeTest(final String fileName, final String expectedMimeType, final boolean extensionWasCorrect) |
| throws MimeTypeParseException, IOException, TikaException { |
| executeTest(fileName, Arrays.asList(expectedMimeType), extensionWasCorrect); |
| } |
| |
| protected void executeTest(final String fileName, final Collection<String> expectedMimeTypes, |
| final boolean extensionWasCorrect) throws MimeTypeParseException, IOException, TikaException { |
| final String extension = EXTENSION_PATTERN.matcher(fileName).replaceAll("$1"); |
| |
| // identify with byte[] and extension |
| try (InputStream inputStream = ConfigUtils.getConfigStream(AllTests.BUNDLE_ID, fileName)) { |
| assertTrue("Could not identify with byte[] and extension", |
| expectedMimeTypes.contains(_identifier.identify(IOUtils.toByteArray(inputStream), extension))); |
| } |
| |
| // identify with file content stream and extension |
| try (InputStream inputStream = ConfigUtils.getConfigStream(AllTests.BUNDLE_ID, fileName)) { |
| assertTrue("Could not identify with InputStream and extension", |
| expectedMimeTypes.contains(_identifier.identify(inputStream, extension))); |
| } |
| |
| // identify with TikaInputStream and extension |
| final TemporaryResources tmp = new TemporaryResources(); |
| try { |
| try (InputStream inputStream = ConfigUtils.getConfigStream(AllTests.BUNDLE_ID, fileName)) { |
| assertTrue("Could not identify with InputStream and extension", |
| expectedMimeTypes.contains(_identifier.identify(TikaInputStream.get(inputStream, tmp), extension))); |
| } |
| |
| // identify with extension only |
| if (extensionWasCorrect) { |
| assertTrue("Could not identify with extension only", |
| expectedMimeTypes.contains(_identifier.identify(extension))); |
| } else { |
| // must still yield no exception... |
| _identifier.identify(extension); |
| } |
| |
| // identify with file content only |
| try (InputStream inputStream = ConfigUtils.getConfigStream(AllTests.BUNDLE_ID, fileName)) { |
| assertTrue("Could not identify with byte[]", |
| expectedMimeTypes.contains(_identifier.identify(IOUtils.toByteArray(inputStream)))); |
| } |
| |
| // identify with file content stream only |
| try (InputStream inputStream = ConfigUtils.getConfigStream(AllTests.BUNDLE_ID, fileName)) { |
| assertTrue("Could not identify with InputStream", |
| expectedMimeTypes.contains(_identifier.identify(inputStream))); |
| } |
| |
| // identify with TikaInputStream only |
| try (InputStream inputStream = ConfigUtils.getConfigStream(AllTests.BUNDLE_ID, fileName)) { |
| assertTrue("Could not identify with InputStream", |
| expectedMimeTypes.contains(_identifier.identify(TikaInputStream.get(inputStream, tmp)))); |
| } |
| } finally { |
| tmp.dispose(); |
| } |
| } |
| } |