blob: a6964cd47f9f444f1bf9613c654466f28cf7b3cb [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2008, 2013 Empolis Information Management GmbH and brox IT Solutions GmbH. All rights reserved.
* This program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
*
* Contributors: Andreas Weber (Empolis Information Management GmbH) - initial API and implementation
*******************************************************************************/
package org.eclipse.smila.tika.test;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.DataFactory;
import org.eclipse.smila.tika.TikaPipelet;
/** Test with parameter 'keepHypens' for keepiing/removing hyphens in extracted test. */
public class TestKeepHyphens extends ConverterPipelineTestBase {
/** second hyphen should have been removed cause it's followed by newline. */
protected void checkHyphensRemoved(final String result) {
assertTrue("result was: " + result, result.contains("first-hyphen"));
assertTrue("result was: " + result, result.contains("secondhyphen"));
assertTrue("result was: " + result, result.contains("automatische Silbentrennung"));
System.out.println(result);
}
/** second hyphen should have been kept. */
protected void checkHyphensKept(final String result) {
assertTrue("result was: " + result, result.contains("first-hyphen"));
assertTrue("result was: " + result, result.contains("second-"));
System.out.println(result);
}
/** test text extraction with removing hyphens (default). */
protected void doTextExtractionRemoveHyphens(final String fileName) throws Exception {
final AnyMap additionalRecordParams = DataFactory.DEFAULT.createAnyMap();
checkHyphensRemoved(executeTest(fileName, additionalRecordParams));
}
/** test text extraction with keeping hyphens. */
protected void doTextExtractionKeepHyphens(final String fileName) throws Exception {
final AnyMap additionalRecordParams = DataFactory.DEFAULT.createAnyMap();
additionalRecordParams.put(TikaPipelet.PROP_KEEP_HYPHENS, "true");
checkHyphensKept(executeTest(fileName, additionalRecordParams));
}
/** Test rtf created from 2010 doc. */
public void testRTF() throws Exception {
final String fileName = "keepHyphens/MSWORD_2010.rtf";
doTextExtractionRemoveHyphens(fileName);
doTextExtractionKeepHyphens(fileName);
}
/** Test msoffice 2010 doc. */
public void testMSOFFICE2010DOCX() throws Exception {
final String fileName = "keepHyphens/MSWORD_2010.docx";
doTextExtractionRemoveHyphens(fileName);
doTextExtractionKeepHyphens(fileName);
}
}