/******************************************************************************* | |
* Copyright (c) 2008 empolis GmbH and brox IT Solutions GmbH. All rights reserved. This program and the accompanying | |
* materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this distribution, | |
* and is available at http://www.eclipse.org/legal/epl-v10.html | |
* | |
* Contributors: Thomas Menzel (brox IT Solution GmbH) - initial creator | |
*******************************************************************************/ | |
package org.eclipse.smila.solr; | |
import org.apache.solr.common.params.MoreLikeThisParams; | |
import org.eclipse.smila.datamodel.Any; | |
import org.eclipse.smila.datamodel.AnyMap; | |
import org.eclipse.smila.datamodel.AnySeq; | |
import org.eclipse.smila.datamodel.Record; | |
import org.eclipse.smila.datamodel.xml.XmlSerializationUtils; | |
import org.eclipse.smila.search.api.SearchResultConstants; | |
import org.eclipse.smila.solr.search.SolrResultAccessor; | |
/** | |
* tests adding MLT to the search and the returned record. | |
* | |
* @author tmenzel | |
*/ | |
public class SolrSearchPipelet_MoreLikeThis_Test extends SolrSearchPipeletTestBase { | |
/** | |
*/ | |
@Override | |
protected void setUp() throws Exception { | |
super.setUp(); | |
_solrField = getName() + "_t"; | |
// index docs | |
addSolrDoc("1", "text containing the word candidate that is to be highlighted once."); | |
addSolrDoc("2", "text containing the word candidate that is to be highlighted once again"); | |
addSolrDoc("3", "text not containing the word "); | |
addSolrDoc("4", "text containing the word candidate that is to be highlighted thrice. " | |
+ "Candidate 2nd time occurence. And the 3rd Candidate is not retuened due to maxFragsize"); | |
indexAndCommit(); | |
} | |
/** | |
* case to test that the result builder works properly by adding the MLT info nested into the result records. | |
*/ | |
public void test_Search_MLT_NestedInResults() throws Exception { | |
// setup search | |
_queryBuilder.setQueryFindAll(); | |
_queryBuilder.setResultAttributes(_solrField); | |
final AnyMap mltArgs = _record.getMetadata().getFactory().createAnyMap(); | |
mltArgs.put(MoreLikeThisParams.SIMILARITY_FIELDS, _solrField); | |
mltArgs.put(MoreLikeThisParams.MIN_DOC_FREQ, 1); | |
mltArgs.put(MoreLikeThisParams.MIN_TERM_FREQ, 1); | |
mltArgs.put(MoreLikeThisParams.MIN_WORD_LEN, 4); | |
_queryBuilder.setMoreLikeThis(mltArgs); | |
// exec search | |
_pipelet.process(_blackboard, new String[] { _record.getId() }); | |
_log.debug("result record: " + XmlSerializationUtils.serialize2string(_record)); | |
// verify | |
final SolrResultAccessor results = new SolrResultAccessor(WORKFLOW, _record); | |
final AnySeq resultRecords = results.getResultRecords(); | |
assertEquals("all reecords should be present", 4, resultRecords.size()); | |
// checking the counts is good enough for me here | |
for (Any any : resultRecords) { | |
AnyMap resultItem = (AnyMap) any; | |
// assert that the meta info is complete but not the values | |
assertNotNull(resultItem.getMap(SolrConstants.MLT_RESULT_META)); | |
final AnySeq mltResults = resultItem.getSeq(SolrConstants.MLT_RESULT_ITEMS); | |
assertNotNull(mltResults); | |
assertEquals(3, mltResults.size()); | |
for (Any any2 : mltResults) { | |
AnyMap mltResult = (AnyMap) any2; | |
assertNotNull(mltResult.getStringValue(Record.RECORD_ID)); | |
assertNotNull(mltResult.getStringValue(SearchResultConstants.WEIGHT)); | |
assertNotNull(mltResult.getStringValue(_solrField)); | |
} | |
} | |
} | |
/** | |
* just tests that the MLT results are returned as the normal result | |
*/ | |
public void test_Search_MLT_Handler_InterestingTerms_None() throws Exception { | |
// setup search | |
_queryBuilder.setQueryFindAll(); | |
_queryBuilder.setRequestHandler("/mlt"); | |
_queryBuilder.setResultAttributes(_solrField); | |
final AnyMap mltArgs = _record.getMetadata().getFactory().createAnyMap(); | |
mltArgs.put(MoreLikeThisParams.SIMILARITY_FIELDS, _solrField); | |
mltArgs.put(MoreLikeThisParams.MIN_DOC_FREQ, 1); | |
mltArgs.put(MoreLikeThisParams.MIN_TERM_FREQ, 1); | |
mltArgs.put(MoreLikeThisParams.MIN_WORD_LEN, 4); | |
_queryBuilder.setMoreLikeThis(mltArgs); | |
// exec search | |
_pipelet.process(_blackboard, new String[] { _record.getId() }); | |
_log.debug("result record: " + XmlSerializationUtils.serialize2string(_record)); | |
// verify | |
final SolrResultAccessor results = new SolrResultAccessor(WORKFLOW, _record); | |
final AnySeq resultRecords = results.getResultRecords(); | |
assertEquals("only 3 can be related out of 4", 3, resultRecords.size()); | |
for (int i = 0; i < results.getNumberOfRecords(); i++) { | |
final AnyMap mltResult = results.getResultRecord(i).getMetadata(); | |
// checking the counts is good enough for me here | |
assertNotNull(mltResult.getStringValue(Record.RECORD_ID)); | |
assertNotNull(mltResult.getStringValue(SearchResultConstants.WEIGHT)); | |
assertNotNull(mltResult.getStringValue(_solrField)); | |
} | |
} | |
/** | |
* just tests that the MLT results are returned as the normal result | |
*/ | |
public void test_Search_MLT_Handler_InterestingTerms_Details() throws Exception { | |
// not impl'ed yet and of little need (so far) | |
} | |
} |