core/org.eclipse.smila.solr.test/code/src/org/eclipse/smila/solr/SolrSearchPipelet_HL_Test.java - smila/org.eclipse.smila.core - Git at Google

 /*******************************************************************************
  * Copyright (c) 2008 empolis GmbH and brox IT Solutions GmbH. All rights reserved. This program and the accompanying
  * materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this distribution,
  * and is available at http://www.eclipse.org/legal/epl-v10.html
  *
  * Contributors: Thomas Menzel (brox IT Solution GmbH) - initial creator
  *******************************************************************************/
 package org.eclipse.smila.solr;

 import org.apache.commons.lang.StringUtils;
 import org.eclipse.smila.datamodel.Any;
 import org.eclipse.smila.datamodel.AnyMap;
 import org.eclipse.smila.datamodel.AnySeq;
 import org.eclipse.smila.datamodel.DataFactory;
 import org.eclipse.smila.datamodel.xml.XmlSerializationUtils;
 import org.eclipse.smila.search.api.SearchResultConstants;
 import org.eclipse.smila.search.api.helper.ResultItemAccessor;
 import org.eclipse.smila.solr.search.SolrResultAccessor;

 /**
  *
  * @author tmenzel
  */
 public class SolrSearchPipelet_HL_Test extends SolrSearchPipeletTestBase {

   /**
    */
   @Override
   protected void setUp() throws Exception {
     super.setUp();
     _solrField = getName() + "_t";
   }

   /**
    * simple case to test HL, that also shows how HL works with solr.
    */
   public void test_Search_HL_Text_Simple() throws Exception {
     // index docs
     addSolrDoc("1", "text containing the word candidate that is to be highlighted once.");
     addSolrDoc("2", "text containing the word candidate that is to be highlighted twice, candidate");
     addSolrDoc("3", "text not containing the word ");
     addSolrDoc("4", "text containing the word candidate that is to be highlighted thrice. "
       + "Candidate 2nd time occurence. And the 3rd Candidate is not retuened due to maxFragsize");
     indexAndCommit();

     // setup search
     final AnyMap highlight = DataFactory.DEFAULT.createAnyMap();
     highlight.put("hl.fragsize", 84);
     _queryBuilder.addHighlightByConfig(_solrField, highlight);
     _queryBuilder.setQuery(_solrField + ":candidate");

     _pipelet.process(_blackboard, new String[] { _record.getId() });
     _log.debug("result record: " + XmlSerializationUtils.serialize2string(_record));

     // verify
     final SolrResultAccessor results = new SolrResultAccessor(WORKFLOW, _record);
     final AnySeq resultRecords = results.getResultRecords();
     assertEquals("the 3rd record should not be found", 3, resultRecords.size());

     assertHitTextContains(results, 1, "<em>candidate</em>", 1);
     assertHitTextContains(results, 2, "<em>candidate</em>", 2);
     assertHitTextContains(results, 4, "<em>candidate</em>", 2);

   }

   // /**
   // * simple case to test HL with regex. this is more an investigation test than a real test of our stuff.
   // */
   // public void test_Search_HL_RegEx() throws Exception {
   // // index docs
   // final String sentence2 = "And a flow up sentence.";
   // addSolrDoc("1", "This is a sentence that contains the search word. " + sentence2);
   // indexAndCommit();
   //
   // // setup search
   // final AnyMap highlight = DataFactory.DEFAULT.createAnyMap();
   // highlight.put("hl.fragmenter", "regex");
   // _queryBuilder.addHighlightByConfig(_solrField, highlight);
   // _queryBuilder.setQuery(_solrField + ":contains");
   //
   // _pipelet.process(_blackboard, new String[] { _record.getId() });
   // _log.debug("result record: " + XmlSerializationUtils.serialize2string(_record));
   //
   // // verify
   // final SolrResultAccessor results = new SolrResultAccessor(WORKFLOW, _record);
   // final AnySeq resultRecords = results.getResultRecords();
   // assertEquals("the 3rd record should not be found", 1, resultRecords.size());
   //
   // assertHitTextContains(results, 1, "<em>contains</em>", 1);
   // // "sentence2 should be dropped by regex fragger
   // assertHitTextContains(results, 1, sentence2, 0);
   // }

   // /**
   // * investigation/regression test to mimic the old sentence HL transformer with OOB solr classes. this just tests if
   // * there are 2 nippets returned where it is assumed that the 1st returns a merged one.
   // */
   // public void test_Search_HL_Text_SentenceStyle() throws Exception {
   // // index docs
   // addSolrDoc("1", "Text containing 1 the word. Text containing 2 the word. " //
   // + repeat("Some other content w/o the word. ", 10) //
   // + "Text containing 3 the word.");
   // indexAndCommit();
   //
   // // setup search
   // final AnyMap highlight = DataFactory.DEFAULT.createAnyMap();
   // highlight.put("hl.fragsize", 20);
   // highlight.put("hl.sippets", 5);
   // highlight.put("hl.fragmenter", "regex");
   // highlight.put("hl.mergeContiguous", true);
   // _queryBuilder.addHighlightByConfig(_solrField, highlight);
   // _queryBuilder.setQuery(_solrField + ":containing");
   //
   // _pipelet.process(_blackboard, new String[] { _record.getId() });
   // _log.debug("result record: " + XmlSerializationUtils.serialize2string(_record));
   //
   // // verify
   // final SolrResultAccessor results = new SolrResultAccessor(WORKFLOW, _record);
   // final AnySeq resultRecords = results.getResultRecords();
   // assertEquals("", 1, resultRecords.size());
   //
   // final ResultItemAccessor result = results.getResultRecord(0);
   // final Any highlightingResult =
   // result.getHighlight().getMap(_solrField).get(SearchResultConstants.HIGHLIGHT_TEXT);
   // if (!highlightingResult.isSeq()) {
   // fail("Should be sequence");
   // } else {
   // // final AnySeq solrFieldValue = result.getMetadata().getSeq(_solrField);
   // // assertEquals(2, solrFieldValue.size());
   // assertEquals(2, highlightingResult.asSeq().size());
   // }
   // }

   private void assertHitTextContains(final SolrResultAccessor results, int id, String expectedHlWordWithTags,
     int expectedCount) {
     final String id2 = getId(id + "");
     for (Any itemMap : results.getResultRecords()) {
       final ResultItemAccessor result = new ResultItemAccessor(-1, itemMap.asMap());
       if (result.getRecordId().equals(id2)) {
         final String text =
           result.getHighlight().getMap(_solrField).getStringValue(SearchResultConstants.HIGHLIGHT_TEXT);
         assertEquals(expectedCount,
           StringUtils.countMatches(text.toLowerCase(), expectedHlWordWithTags.toLowerCase()));
         return;
       }
     } // for
     fail("record for id not found: " + id2);
   }

 }
	/*******************************************************************************
	* Copyright (c) 2008 empolis GmbH and brox IT Solutions GmbH. All rights reserved. This program and the accompanying
	* materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this distribution,
	* and is available at http://www.eclipse.org/legal/epl-v10.html
	*
	* Contributors: Thomas Menzel (brox IT Solution GmbH) - initial creator
	*******************************************************************************/
	package org.eclipse.smila.solr;

	import org.apache.commons.lang.StringUtils;
	import org.eclipse.smila.datamodel.Any;
	import org.eclipse.smila.datamodel.AnyMap;
	import org.eclipse.smila.datamodel.AnySeq;
	import org.eclipse.smila.datamodel.DataFactory;
	import org.eclipse.smila.datamodel.xml.XmlSerializationUtils;
	import org.eclipse.smila.search.api.SearchResultConstants;
	import org.eclipse.smila.search.api.helper.ResultItemAccessor;
	import org.eclipse.smila.solr.search.SolrResultAccessor;

	/**
	*
	* @author tmenzel
	*/
	public class SolrSearchPipelet_HL_Test extends SolrSearchPipeletTestBase {

	/**
	*/
	@Override
	protected void setUp() throws Exception {
	super.setUp();
	_solrField = getName() + "_t";
	}

	/**
	* simple case to test HL, that also shows how HL works with solr.
	*/
	public void test_Search_HL_Text_Simple() throws Exception {
	// index docs
	addSolrDoc("1", "text containing the word candidate that is to be highlighted once.");
	addSolrDoc("2", "text containing the word candidate that is to be highlighted twice, candidate");
	addSolrDoc("3", "text not containing the word ");
	addSolrDoc("4", "text containing the word candidate that is to be highlighted thrice. "
	+ "Candidate 2nd time occurence. And the 3rd Candidate is not retuened due to maxFragsize");
	indexAndCommit();

	// setup search
	final AnyMap highlight = DataFactory.DEFAULT.createAnyMap();
	highlight.put("hl.fragsize", 84);
	_queryBuilder.addHighlightByConfig(_solrField, highlight);
	_queryBuilder.setQuery(_solrField + ":candidate");

	_pipelet.process(_blackboard, new String[] { _record.getId() });
	_log.debug("result record: " + XmlSerializationUtils.serialize2string(_record));

	// verify
	final SolrResultAccessor results = new SolrResultAccessor(WORKFLOW, _record);
	final AnySeq resultRecords = results.getResultRecords();
	assertEquals("the 3rd record should not be found", 3, resultRecords.size());

	assertHitTextContains(results, 1, "<em>candidate</em>", 1);
	assertHitTextContains(results, 2, "<em>candidate</em>", 2);
	assertHitTextContains(results, 4, "<em>candidate</em>", 2);

	}

	// /**
	// * simple case to test HL with regex. this is more an investigation test than a real test of our stuff.
	// */
	// public void test_Search_HL_RegEx() throws Exception {
	// // index docs
	// final String sentence2 = "And a flow up sentence.";
	// addSolrDoc("1", "This is a sentence that contains the search word. " + sentence2);
	// indexAndCommit();
	//
	// // setup search
	// final AnyMap highlight = DataFactory.DEFAULT.createAnyMap();
	// highlight.put("hl.fragmenter", "regex");
	// _queryBuilder.addHighlightByConfig(_solrField, highlight);
	// _queryBuilder.setQuery(_solrField + ":contains");
	//
	// _pipelet.process(_blackboard, new String[] { _record.getId() });
	// _log.debug("result record: " + XmlSerializationUtils.serialize2string(_record));
	//
	// // verify
	// final SolrResultAccessor results = new SolrResultAccessor(WORKFLOW, _record);
	// final AnySeq resultRecords = results.getResultRecords();
	// assertEquals("the 3rd record should not be found", 1, resultRecords.size());
	//
	// assertHitTextContains(results, 1, "<em>contains</em>", 1);
	// // "sentence2 should be dropped by regex fragger
	// assertHitTextContains(results, 1, sentence2, 0);
	// }

	// /**
	// * investigation/regression test to mimic the old sentence HL transformer with OOB solr classes. this just tests if
	// * there are 2 nippets returned where it is assumed that the 1st returns a merged one.
	// */
	// public void test_Search_HL_Text_SentenceStyle() throws Exception {
	// // index docs
	// addSolrDoc("1", "Text containing 1 the word. Text containing 2 the word. " //
	// + repeat("Some other content w/o the word. ", 10) //
	// + "Text containing 3 the word.");
	// indexAndCommit();
	//
	// // setup search
	// final AnyMap highlight = DataFactory.DEFAULT.createAnyMap();
	// highlight.put("hl.fragsize", 20);
	// highlight.put("hl.sippets", 5);
	// highlight.put("hl.fragmenter", "regex");
	// highlight.put("hl.mergeContiguous", true);
	// _queryBuilder.addHighlightByConfig(_solrField, highlight);
	// _queryBuilder.setQuery(_solrField + ":containing");
	//
	// _pipelet.process(_blackboard, new String[] { _record.getId() });
	// _log.debug("result record: " + XmlSerializationUtils.serialize2string(_record));
	//
	// // verify
	// final SolrResultAccessor results = new SolrResultAccessor(WORKFLOW, _record);
	// final AnySeq resultRecords = results.getResultRecords();
	// assertEquals("", 1, resultRecords.size());
	//
	// final ResultItemAccessor result = results.getResultRecord(0);
	// final Any highlightingResult =
	// result.getHighlight().getMap(_solrField).get(SearchResultConstants.HIGHLIGHT_TEXT);
	// if (!highlightingResult.isSeq()) {
	// fail("Should be sequence");
	// } else {
	// // final AnySeq solrFieldValue = result.getMetadata().getSeq(_solrField);
	// // assertEquals(2, solrFieldValue.size());
	// assertEquals(2, highlightingResult.asSeq().size());
	// }
	// }

	private void assertHitTextContains(final SolrResultAccessor results, int id, String expectedHlWordWithTags,
	int expectedCount) {
	final String id2 = getId(id + "");
	for (Any itemMap : results.getResultRecords()) {
	final ResultItemAccessor result = new ResultItemAccessor(-1, itemMap.asMap());
	if (result.getRecordId().equals(id2)) {
	final String text =
	result.getHighlight().getMap(_solrField).getStringValue(SearchResultConstants.HIGHLIGHT_TEXT);
	assertEquals(expectedCount,
	StringUtils.countMatches(text.toLowerCase(), expectedHlWordWithTags.toLowerCase()));
	return;
	}
	} // for
	fail("record for id not found: " + id2);
	}

	}