| /*********************************************************************************************************************** |
| * Copyright (c) 2008,2012 Attensity Europe GmbH and brox IT Solutions GmbH. All rights reserved. This program and the |
| * accompanying materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this |
| * distribution, and is available at http://www.eclipse.org/legal/epl-v10.html |
| * |
| * Contributors: Andreas Schank (Attensity Europe GmbH) - initial API and implementation |
| **********************************************************************************************************************/ |
| package org.eclipse.smila.importing.compounds.simple.test; |
| |
| import java.io.File; |
| import java.net.MalformedURLException; |
| import java.net.URL; |
| import java.util.Arrays; |
| import java.util.Collection; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.Map; |
| |
| import org.eclipse.smila.datamodel.Any; |
| import org.eclipse.smila.datamodel.AnySeq; |
| import org.eclipse.smila.datamodel.DataFactory; |
| import org.eclipse.smila.datamodel.Record; |
| import org.eclipse.smila.importing.compounds.CompoundExtractor; |
| import org.eclipse.smila.importing.compounds.CompoundExtractorException; |
| import org.eclipse.smila.importing.compounds.simple.SimpleCompoundExtractorService; |
| import org.eclipse.smila.test.DeclarativeServiceTestCase; |
| import org.eclipse.smila.utils.config.ConfigUtils; |
| |
| /** |
| * @author scank01 |
| * |
| */ |
| public class TestSimpleCompoundExtractorService extends DeclarativeServiceTestCase { |
| |
| private static final String BUNDLE_NAME = "org.eclipse.smila.importing.compounds.simple.test"; |
| |
| /** the compound extractor under test. */ |
| private CompoundExtractor _compoundExtractor; |
| |
| /** {@inheritDoc} */ |
| @Override |
| protected void setUp() throws Exception { |
| super.setUp(); |
| _compoundExtractor = getService(CompoundExtractor.class); |
| assertNotNull(_compoundExtractor); |
| assertTrue(_compoundExtractor instanceof SimpleCompoundExtractorService); |
| } |
| |
| /** {@inheritDoc} */ |
| @Override |
| protected void tearDown() throws Exception { |
| super.tearDown(); |
| } |
| |
| /** |
| * Test method for |
| * {@link org.eclipse.smila.importing.compounds.simple.SimpleCompoundExtractorService#canExtract(java.io.File)}. |
| */ |
| public void testCanExtractFile() { |
| assertFalse(_compoundExtractor.canExtract(null)); |
| assertFalse(_compoundExtractor.canExtract(new File("null"))); |
| assertTrue(_compoundExtractor.canExtract(new File("a.zip"))); |
| assertTrue(_compoundExtractor.canExtract(new File("a.ZIP"))); |
| assertTrue(_compoundExtractor.canExtract(new File("a.zIp"))); |
| assertTrue(_compoundExtractor.canExtract(new File("a.gz"))); |
| assertTrue(_compoundExtractor.canExtract(new File("a.GZ"))); |
| assertTrue(_compoundExtractor.canExtract(new File("a.gZ"))); |
| } |
| |
| /** |
| * Test method for |
| * {@link org.eclipse.smila.importing.compounds.simple.SimpleCompoundExtractorService#canExtract(java.net.URL, java.lang.String)} |
| * . |
| * |
| * @throws MalformedURLException |
| */ |
| public void testCanExtractURLString() throws MalformedURLException { |
| assertFalse(_compoundExtractor.canExtract((URL) null, null)); |
| assertFalse(_compoundExtractor.canExtract((String) null, null)); |
| assertFalse(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d"), null)); |
| assertFalse(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d"), "")); |
| assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d"), "application/x-gunzip")); |
| assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d"), "application/x-gzip")); |
| assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d"), "application/zip")); |
| assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.gz"), "application/x-gunzip")); |
| assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.GZ"), "application/x-gzip")); |
| assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.zip"), "application/zip")); |
| assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.ZIP"), "application/zip")); |
| assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.ZIP"), "application/octet-stream")); |
| assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.gz"), "application/octet-stream")); |
| assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.gz"), "")); |
| assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.zip"), null)); |
| assertFalse(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.gz"), "application/pdf")); |
| assertFalse(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.zip"), "text/plain")); |
| } |
| |
| /** |
| * Test method for |
| * {@link org.eclipse.smila.importing.compounds.simple.SimpleCompoundExtractorService#extract(java.io.InputStream)}. |
| * |
| * @throws CompoundExtractorException |
| */ |
| public void testExtractGzipInZip() throws CompoundExtractorException { |
| doTestTestZipEntries("test.zip"); |
| } |
| |
| /** |
| * Test method for |
| * {@link org.eclipse.smila.importing.compounds.simple.SimpleCompoundExtractorService#extract(java.io.InputStream)} |
| * using a zip created by WinZip. |
| * |
| * @throws CompoundExtractorException |
| */ |
| public void testExtractGzipInWinzipZip() throws CompoundExtractorException { |
| doTestTestZipEntries("test-winzip.zip"); |
| } |
| |
| /** test the test.zip variants. */ |
| private void doTestTestZipEntries(final String baseZipName) throws CompoundExtractorException { |
| final Collection<String> compounds = |
| Arrays.asList(baseZipName, baseZipName + "/\u00e4/\u00b3zip.zip", baseZipName + "/doc.txt.gz"); |
| final Collection<String> files = |
| Arrays.asList(baseZipName, baseZipName + "/\u00e4/\u00b3zip.zip", baseZipName + "/doc.txt.gz", baseZipName |
| + "/doc.txt.gz/doc.txt", baseZipName + "/doc2.txt", baseZipName + "/\u00e4/\u00fc.txt", baseZipName |
| + "/\u00e4/\u00b3zip.zip/\u00b3zip/\u00fc.txt"); |
| final Collection<String> internalFileNames = |
| Arrays.asList(baseZipName, "\u00e4/\u00b3zip.zip", "doc.txt.gz", "doc.txt", "doc2.txt", "\u00e4/\u00fc.txt", |
| "\u00b3zip/\u00fc.txt"); |
| final Iterator<Record> iter = |
| _compoundExtractor |
| .extract(ConfigUtils.getConfigStream(BUNDLE_NAME, baseZipName), baseZipName, "test-content"); |
| final Collection<String> foundFiles = new HashSet<String>(); |
| while (iter.hasNext()) { |
| final Record record = iter.next(); |
| final String fileName = record.getId(); |
| foundFiles.add(fileName); |
| assertTrue(fileName + " not expected.", files.contains(fileName)); |
| assertTrue(fileName + " not correct.", |
| fileName.endsWith(record.getMetadata().getStringValue(CompoundExtractor.KEY_FILE_NAME))); |
| final String internalFileName = record.getMetadata().getStringValue(CompoundExtractor.KEY_FILE_NAME); |
| assertTrue(internalFileName + " is no correct entry file name", internalFileNames.contains(internalFileName)); |
| if (fileName.equals(baseZipName)) { |
| assertTrue(record.getMetadata().containsKey(CompoundExtractor.KEY_IS_ROOT_COMPOUND_RECORD)); |
| assertTrue(record.getMetadata().getBooleanValue(CompoundExtractor.KEY_IS_ROOT_COMPOUND_RECORD)); |
| } else { |
| assertFalse(record.getMetadata().containsKey(CompoundExtractor.KEY_IS_ROOT_COMPOUND_RECORD)); |
| } |
| if (compounds.contains(fileName)) { |
| assertTrue(record.getMetadata().getBooleanValue(CompoundExtractor.KEY_IS_COMPOUND)); |
| assertFalse(record.hasAttachments()); |
| } else { |
| assertFalse(record.getMetadata().containsKey(CompoundExtractor.KEY_IS_COMPOUND)); |
| assertTrue(record.hasAttachment("test-content")); |
| } |
| AnySeq compoundsSeq = DataFactory.DEFAULT.createAnySeq(); |
| if (record.getMetadata().containsKey(CompoundExtractor.KEY_COMPOUNDS)) { |
| compoundsSeq = record.getMetadata().getSeq(CompoundExtractor.KEY_COMPOUNDS); |
| } |
| String computedId = ""; |
| for (final Any compound : compoundsSeq) { |
| computedId += compound.asValue().asString() + "/"; |
| } |
| computedId += record.getMetadata().getStringValue(CompoundExtractor.KEY_FILE_NAME); |
| assertEquals("ids do not match.", computedId, record.getId()); |
| } |
| assertEquals(files.size(), foundFiles.size()); |
| } |
| |
| /** |
| * Test method for |
| * {@link org.eclipse.smila.importing.compounds.simple.SimpleCompoundExtractorService#extract(java.io.InputStream)}. |
| * |
| * @throws CompoundExtractorException |
| */ |
| public void testExtractGzipInZipInGzip() throws CompoundExtractorException { |
| final Collection<String> compounds = |
| Arrays.asList("1.zip.gz", "1.zip.gz/1.zip", "1.zip.gz/1.zip/1/2/3/4.txt.gz"); |
| final Collection<String> files = |
| Arrays.asList("1.zip.gz", "1.zip.gz/1.zip", "1.zip.gz/1.zip/1/2/3/4.txt.gz", "1.zip.gz/1.zip/1/2/3/4.txt", |
| "1.zip.gz/1.zip/1/2/3/4.txt.gz/4.txt"); |
| final Collection<String> internalFileNames = |
| Arrays.asList("1.zip.gz", "1.zip", "1/2/3/4.txt.gz", "1/2/3/4.txt", "4.txt"); |
| final Map<String, Long> sizes = new HashMap<String, Long>(); |
| sizes.put("1.zip.gz/1.zip/1/2/3/4.txt.gz", 68L); |
| sizes.put("1.zip.gz/1.zip/1/2/3/4.txt.gz/4.txt", 1018L); |
| sizes.put("1.zip.gz/1.zip/1/2/3/4.txt", 1018L); |
| sizes.put("1.zip.gz/1.zip", 768L); |
| final Iterator<Record> iter = |
| _compoundExtractor.extract(ConfigUtils.getConfigStream(BUNDLE_NAME, "1.zip.gz"), "1.zip.gz", "test-content"); |
| checkResultingRecords(compounds, files, internalFileNames, sizes, iter); |
| } |
| |
| /** |
| * Test method for {@link CompoundExtractor#extract(java.io.InputStream, String, String)} using a zip file created |
| * with linux. |
| * |
| * @throws CompoundExtractorException |
| */ |
| public void testLinuxZip() throws CompoundExtractorException { |
| final Collection<String> compounds = Arrays.asList("\u00e4_linux.zip"); |
| final Collection<String> files = |
| Arrays.asList("\u00e4_linux.zip", "\u00e4_linux.zip/\u00e4/\u00b3.txt", "\u00e4_linux.zip/\u00e4/\u00e4.txt"); |
| final Collection<String> internalFileNames = |
| Arrays.asList("\u00e4_linux.zip", "\u00e4/\u00b3.txt", "\u00e4/\u00e4.txt"); |
| final Map<String, Long> sizes = new HashMap<String, Long>(); |
| sizes.put("\u00e4_linux.zip/\u00e4/\u00b3.txt", 11L); |
| sizes.put("\u00e4_linux.zip/\u00e4/\u00e4.txt", 11L); |
| final Iterator<Record> iter = |
| _compoundExtractor.extract(ConfigUtils.getConfigStream(BUNDLE_NAME, "\u00e4_linux.zip"), "\u00e4_linux.zip", |
| "test-content"); |
| checkResultingRecords(compounds, files, internalFileNames, sizes, iter); |
| } |
| |
| /** |
| * Test method for {@link CompoundExtractor#extract(java.io.InputStream, String, String)} extracting a gz file. |
| * |
| * @throws CompoundExtractorException |
| */ |
| public void testGz() throws CompoundExtractorException { |
| final Collection<String> compounds = Arrays.asList("doc.txt.gz"); |
| final Collection<String> files = Arrays.asList("doc.txt.gz", "doc.txt.gz/doc.txt"); |
| final Collection<String> internalFileNames = Arrays.asList("doc.txt.gz", "doc.txt"); |
| final Map<String, Long> sizes = new HashMap<String, Long>(); |
| sizes.put("doc.txt.gz/doc.txt", 16L); |
| final Iterator<Record> iter = |
| _compoundExtractor.extract(ConfigUtils.getConfigStream(BUNDLE_NAME, "doc.txt.gz"), "doc.txt.gz", |
| "test-content"); |
| checkResultingRecords(compounds, files, internalFileNames, sizes, iter); |
| } |
| |
| /** check the records. */ |
| private void checkResultingRecords(final Collection<String> compounds, final Collection<String> files, |
| final Collection<String> internalFileNames, final Map<String, Long> sizes, final Iterator<Record> iter) { |
| final Collection<String> foundFiles = new HashSet<String>(); |
| while (iter.hasNext()) { |
| final Record record = iter.next(); |
| final String fileName = record.getId(); |
| foundFiles.add(fileName); |
| assertTrue(fileName + " not expected.", files.contains(fileName)); |
| assertEquals("Uncompressed size does not match for " + fileName, sizes.get(fileName), record.getMetadata() |
| .getLongValue(CompoundExtractor.KEY_SIZE)); |
| final String internalFileName = record.getMetadata().getStringValue(CompoundExtractor.KEY_FILE_NAME); |
| assertTrue(internalFileName + " is no correct entry file name", internalFileNames.contains(internalFileName)); |
| if (compounds.contains(fileName)) { |
| assertTrue("CompoundExtractor.KEY_IS_COMPOUND not present", |
| record.getMetadata().containsKey(CompoundExtractor.KEY_IS_COMPOUND)); |
| assertTrue("CompoundExtractor.KEY_IS_COMPOUND not set to true", |
| record.getMetadata().getBooleanValue(CompoundExtractor.KEY_IS_COMPOUND)); |
| assertFalse(record.hasAttachments()); |
| } else { |
| assertFalse(record.getId() + " has compound flag", |
| record.getMetadata().containsKey(CompoundExtractor.KEY_IS_COMPOUND)); |
| assertTrue(record.hasAttachment("test-content")); |
| } |
| AnySeq compoundsSeq = DataFactory.DEFAULT.createAnySeq(); |
| if (record.getMetadata().containsKey(CompoundExtractor.KEY_COMPOUNDS)) { |
| compoundsSeq = record.getMetadata().getSeq(CompoundExtractor.KEY_COMPOUNDS); |
| } |
| String computedId = ""; |
| for (final Any compound : compoundsSeq) { |
| computedId += compound.asValue().asString() + "/"; |
| } |
| computedId += record.getMetadata().getStringValue(CompoundExtractor.KEY_FILE_NAME); |
| assertEquals("ids do not match.", computedId, record.getId()); |
| } |
| assertEquals(files.size(), foundFiles.size()); |
| } |
| |
| } |