blob: bda9cbd58b82a5a2c499348ec317b4885ef477eb [file] [log] [blame]
/*******************************************************************************
* Copyright (c) 2008 empolis GmbH and brox IT Solutions GmbH. All rights reserved. This program and the accompanying
* materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this distribution,
* and is available at http://www.eclipse.org/legal/epl-v10.html
*
* Contributors: Thomas Menzel (brox IT Solution GmbH) - initial creator
*******************************************************************************/
package org.eclipse.smila.solr;
import org.apache.commons.lang.StringUtils;
import org.eclipse.smila.datamodel.Any;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.AnySeq;
import org.eclipse.smila.datamodel.DataFactory;
import org.eclipse.smila.datamodel.xml.XmlSerializationUtils;
import org.eclipse.smila.search.api.SearchResultConstants;
import org.eclipse.smila.search.api.helper.ResultItemAccessor;
import org.eclipse.smila.solr.search.SolrResultAccessor;
/**
*
* @author tmenzel
*/
public class SolrSearchPipelet_HL_Test extends SolrSearchPipeletTestBase {
/**
*/
@Override
protected void setUp() throws Exception {
super.setUp();
_solrField = getName() + "_t";
}
/**
* simple case to test HL, that also shows how HL works with solr.
*/
public void test_Search_HL_Text_Simple() throws Exception {
// index docs
addSolrDoc("1", "text containing the word candidate that is to be highlighted once.");
addSolrDoc("2", "text containing the word candidate that is to be highlighted twice, candidate");
addSolrDoc("3", "text not containing the word ");
addSolrDoc("4", "text containing the word candidate that is to be highlighted thrice. "
+ "Candidate 2nd time occurence. And the 3rd Candidate is not retuened due to maxFragsize");
indexAndCommit();
// setup search
final AnyMap highlight = DataFactory.DEFAULT.createAnyMap();
highlight.put("hl.fragsize", 84);
_queryBuilder.addHighlightByConfig(_solrField, highlight);
_queryBuilder.setQuery(_solrField + ":candidate");
_pipelet.process(_blackboard, new String[] { _record.getId() });
_log.debug("result record: " + XmlSerializationUtils.serialize2string(_record));
// verify
final SolrResultAccessor results = new SolrResultAccessor(WORKFLOW, _record);
final AnySeq resultRecords = results.getResultRecords();
assertEquals("the 3rd record should not be found", 3, resultRecords.size());
assertHitTextContains(results, 1, "<em>candidate</em>", 1);
assertHitTextContains(results, 2, "<em>candidate</em>", 2);
assertHitTextContains(results, 4, "<em>candidate</em>", 2);
}
// /**
// * simple case to test HL with regex. this is more an investigation test than a real test of our stuff.
// */
// public void test_Search_HL_RegEx() throws Exception {
// // index docs
// final String sentence2 = "And a flow up sentence.";
// addSolrDoc("1", "This is a sentence that contains the search word. " + sentence2);
// indexAndCommit();
//
// // setup search
// final AnyMap highlight = DataFactory.DEFAULT.createAnyMap();
// highlight.put("hl.fragmenter", "regex");
// _queryBuilder.addHighlightByConfig(_solrField, highlight);
// _queryBuilder.setQuery(_solrField + ":contains");
//
// _pipelet.process(_blackboard, new String[] { _record.getId() });
// _log.debug("result record: " + XmlSerializationUtils.serialize2string(_record));
//
// // verify
// final SolrResultAccessor results = new SolrResultAccessor(WORKFLOW, _record);
// final AnySeq resultRecords = results.getResultRecords();
// assertEquals("the 3rd record should not be found", 1, resultRecords.size());
//
// assertHitTextContains(results, 1, "<em>contains</em>", 1);
// // "sentence2 should be dropped by regex fragger
// assertHitTextContains(results, 1, sentence2, 0);
// }
// /**
// * investigation/regression test to mimic the old sentence HL transformer with OOB solr classes. this just tests if
// * there are 2 nippets returned where it is assumed that the 1st returns a merged one.
// */
// public void test_Search_HL_Text_SentenceStyle() throws Exception {
// // index docs
// addSolrDoc("1", "Text containing 1 the word. Text containing 2 the word. " //
// + repeat("Some other content w/o the word. ", 10) //
// + "Text containing 3 the word.");
// indexAndCommit();
//
// // setup search
// final AnyMap highlight = DataFactory.DEFAULT.createAnyMap();
// highlight.put("hl.fragsize", 20);
// highlight.put("hl.sippets", 5);
// highlight.put("hl.fragmenter", "regex");
// highlight.put("hl.mergeContiguous", true);
// _queryBuilder.addHighlightByConfig(_solrField, highlight);
// _queryBuilder.setQuery(_solrField + ":containing");
//
// _pipelet.process(_blackboard, new String[] { _record.getId() });
// _log.debug("result record: " + XmlSerializationUtils.serialize2string(_record));
//
// // verify
// final SolrResultAccessor results = new SolrResultAccessor(WORKFLOW, _record);
// final AnySeq resultRecords = results.getResultRecords();
// assertEquals("", 1, resultRecords.size());
//
// final ResultItemAccessor result = results.getResultRecord(0);
// final Any highlightingResult =
// result.getHighlight().getMap(_solrField).get(SearchResultConstants.HIGHLIGHT_TEXT);
// if (!highlightingResult.isSeq()) {
// fail("Should be sequence");
// } else {
// // final AnySeq solrFieldValue = result.getMetadata().getSeq(_solrField);
// // assertEquals(2, solrFieldValue.size());
// assertEquals(2, highlightingResult.asSeq().size());
// }
// }
private void assertHitTextContains(final SolrResultAccessor results, int id, String expectedHlWordWithTags,
int expectedCount) {
final String id2 = getId(id + "");
for (Any itemMap : results.getResultRecords()) {
final ResultItemAccessor result = new ResultItemAccessor(-1, itemMap.asMap());
if (result.getRecordId().equals(id2)) {
final String text =
result.getHighlight().getMap(_solrField).getStringValue(SearchResultConstants.HIGHLIGHT_TEXT);
assertEquals(expectedCount,
StringUtils.countMatches(text.toLowerCase(), expectedHlWordWithTags.toLowerCase()));
return;
}
} // for
fail("record for id not found: " + id2);
}
}