/******************************************************************************* | |
* Copyright (c) 2008 empolis GmbH and brox IT Solutions GmbH. All rights reserved. This program and the accompanying | |
* materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this distribution, | |
* and is available at http://www.eclipse.org/legal/epl-v10.html | |
* | |
* Contributors: Thomas Menzel (brox IT Solution GmbH) - initial creator | |
*******************************************************************************/ | |
package org.eclipse.smila.solr; | |
import org.apache.commons.lang.StringUtils; | |
import org.eclipse.smila.datamodel.Any; | |
import org.eclipse.smila.datamodel.AnyMap; | |
import org.eclipse.smila.datamodel.AnySeq; | |
import org.eclipse.smila.datamodel.DataFactory; | |
import org.eclipse.smila.datamodel.xml.XmlSerializationUtils; | |
import org.eclipse.smila.search.api.SearchResultConstants; | |
import org.eclipse.smila.search.api.helper.ResultItemAccessor; | |
import org.eclipse.smila.solr.search.SolrResultAccessor; | |
/** | |
* | |
* @author tmenzel | |
*/ | |
public class SolrSearchPipelet_HL_Test extends SolrSearchPipeletTestBase { | |
/** | |
*/ | |
@Override | |
protected void setUp() throws Exception { | |
super.setUp(); | |
_solrField = getName() + "_t"; | |
} | |
/** | |
* simple case to test HL, that also shows how HL works with solr. | |
*/ | |
public void test_Search_HL_Text_Simple() throws Exception { | |
// index docs | |
addSolrDoc("1", "text containing the word candidate that is to be highlighted once."); | |
addSolrDoc("2", "text containing the word candidate that is to be highlighted twice, candidate"); | |
addSolrDoc("3", "text not containing the word "); | |
addSolrDoc("4", "text containing the word candidate that is to be highlighted thrice. " | |
+ "Candidate 2nd time occurence. And the 3rd Candidate is not retuened due to maxFragsize"); | |
indexAndCommit(); | |
// setup search | |
final AnyMap highlight = DataFactory.DEFAULT.createAnyMap(); | |
highlight.put("hl.fragsize", 84); | |
_queryBuilder.addHighlightByConfig(_solrField, highlight); | |
_queryBuilder.setQuery(_solrField + ":candidate"); | |
_pipelet.process(_blackboard, new String[] { _record.getId() }); | |
_log.debug("result record: " + XmlSerializationUtils.serialize2string(_record)); | |
// verify | |
final SolrResultAccessor results = new SolrResultAccessor(WORKFLOW, _record); | |
final AnySeq resultRecords = results.getResultRecords(); | |
assertEquals("the 3rd record should not be found", 3, resultRecords.size()); | |
assertHitTextContains(results, 1, "<em>candidate</em>", 1); | |
assertHitTextContains(results, 2, "<em>candidate</em>", 2); | |
assertHitTextContains(results, 4, "<em>candidate</em>", 2); | |
} | |
// /** | |
// * simple case to test HL with regex. this is more an investigation test than a real test of our stuff. | |
// */ | |
// public void test_Search_HL_RegEx() throws Exception { | |
// // index docs | |
// final String sentence2 = "And a flow up sentence."; | |
// addSolrDoc("1", "This is a sentence that contains the search word. " + sentence2); | |
// indexAndCommit(); | |
// | |
// // setup search | |
// final AnyMap highlight = DataFactory.DEFAULT.createAnyMap(); | |
// highlight.put("hl.fragmenter", "regex"); | |
// _queryBuilder.addHighlightByConfig(_solrField, highlight); | |
// _queryBuilder.setQuery(_solrField + ":contains"); | |
// | |
// _pipelet.process(_blackboard, new String[] { _record.getId() }); | |
// _log.debug("result record: " + XmlSerializationUtils.serialize2string(_record)); | |
// | |
// // verify | |
// final SolrResultAccessor results = new SolrResultAccessor(WORKFLOW, _record); | |
// final AnySeq resultRecords = results.getResultRecords(); | |
// assertEquals("the 3rd record should not be found", 1, resultRecords.size()); | |
// | |
// assertHitTextContains(results, 1, "<em>contains</em>", 1); | |
// // "sentence2 should be dropped by regex fragger | |
// assertHitTextContains(results, 1, sentence2, 0); | |
// } | |
// /** | |
// * investigation/regression test to mimic the old sentence HL transformer with OOB solr classes. this just tests if | |
// * there are 2 nippets returned where it is assumed that the 1st returns a merged one. | |
// */ | |
// public void test_Search_HL_Text_SentenceStyle() throws Exception { | |
// // index docs | |
// addSolrDoc("1", "Text containing 1 the word. Text containing 2 the word. " // | |
// + repeat("Some other content w/o the word. ", 10) // | |
// + "Text containing 3 the word."); | |
// indexAndCommit(); | |
// | |
// // setup search | |
// final AnyMap highlight = DataFactory.DEFAULT.createAnyMap(); | |
// highlight.put("hl.fragsize", 20); | |
// highlight.put("hl.sippets", 5); | |
// highlight.put("hl.fragmenter", "regex"); | |
// highlight.put("hl.mergeContiguous", true); | |
// _queryBuilder.addHighlightByConfig(_solrField, highlight); | |
// _queryBuilder.setQuery(_solrField + ":containing"); | |
// | |
// _pipelet.process(_blackboard, new String[] { _record.getId() }); | |
// _log.debug("result record: " + XmlSerializationUtils.serialize2string(_record)); | |
// | |
// // verify | |
// final SolrResultAccessor results = new SolrResultAccessor(WORKFLOW, _record); | |
// final AnySeq resultRecords = results.getResultRecords(); | |
// assertEquals("", 1, resultRecords.size()); | |
// | |
// final ResultItemAccessor result = results.getResultRecord(0); | |
// final Any highlightingResult = | |
// result.getHighlight().getMap(_solrField).get(SearchResultConstants.HIGHLIGHT_TEXT); | |
// if (!highlightingResult.isSeq()) { | |
// fail("Should be sequence"); | |
// } else { | |
// // final AnySeq solrFieldValue = result.getMetadata().getSeq(_solrField); | |
// // assertEquals(2, solrFieldValue.size()); | |
// assertEquals(2, highlightingResult.asSeq().size()); | |
// } | |
// } | |
private void assertHitTextContains(final SolrResultAccessor results, int id, String expectedHlWordWithTags, | |
int expectedCount) { | |
final String id2 = getId(id + ""); | |
for (Any itemMap : results.getResultRecords()) { | |
final ResultItemAccessor result = new ResultItemAccessor(-1, itemMap.asMap()); | |
if (result.getRecordId().equals(id2)) { | |
final String text = | |
result.getHighlight().getMap(_solrField).getStringValue(SearchResultConstants.HIGHLIGHT_TEXT); | |
assertEquals(expectedCount, | |
StringUtils.countMatches(text.toLowerCase(), expectedHlWordWithTags.toLowerCase())); | |
return; | |
} | |
} // for | |
fail("record for id not found: " + id2); | |
} | |
} |