blob: cc33fde50d2b21152ed2670b45a2a0b26dba3c41 [file] [log] [blame]
/***********************************************************************************************************************
* Copyright (c) 2008 empolis GmbH and brox IT Solutions GmbH. All rights reserved. This program and the accompanying
* materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this distribution,
* and is available at http://www.eclipse.org/legal/epl-v10.html
*
* Contributors: August Georg Schmidt (brox IT Solutions GmbH) - initial API and implementation
**********************************************************************************************************************/
package org.eclipse.smila.search;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Enumeration;
import java.util.Formatter;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.eclipse.smila.search.exceptions.AFException;
import org.eclipse.smila.search.index.IndexConnection;
import org.eclipse.smila.search.index.IndexException;
import org.eclipse.smila.search.index.IndexManager;
import org.eclipse.smila.search.plugin.Plugin;
import org.eclipse.smila.search.plugin.PluginFactory;
import org.eclipse.smila.search.search.tools.advsearch.AdvSearchException;
import org.eclipse.smila.search.search.tools.advsearch.IAdvSearch;
import org.eclipse.smila.search.search.tools.advsearch.IQueryExpression;
import org.eclipse.smila.search.search.tools.search.DAnyFinderSearch;
import org.eclipse.smila.search.search.tools.search.DAnyFinderSearchCodec;
import org.eclipse.smila.search.search.tools.search.DQuery;
import org.eclipse.smila.search.search.tools.search.DSearchException;
import org.eclipse.smila.search.search.tools.searchresult.DAnyFinderSearchResult;
import org.eclipse.smila.search.search.tools.searchresult.DAnyFinderSearchResultCodec;
import org.eclipse.smila.search.search.tools.searchresult.DHit;
import org.eclipse.smila.search.search.tools.searchresult.DHitDistribution;
import org.eclipse.smila.search.search.tools.searchresult.DItem;
import org.eclipse.smila.search.search.tools.searchresult.DResult;
import org.eclipse.smila.search.search.tools.searchresult.DSearchResultException;
import org.eclipse.smila.search.tools.errormessage.DErrorMessage;
import org.eclipse.smila.tools.XMLUtils;
import org.eclipse.smila.tools.XMLUtilsConfig;
import org.eclipse.smila.tools.XMLUtilsException;
import org.eclipse.smila.tools.cache.CacheException;
import org.eclipse.smila.tools.cache.CacheManager;
import org.eclipse.smila.tools.cache.CacheManagerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
/**
* Search Class.
*
* @author August Georg Schmidt (BROX)
* @version 1.0
*/
public abstract class AFSearch {
static {
EIFActivator.registerSchemas();
}
/**
* Hidden constructor. Class must be used via static methods.
*/
private AFSearch() {
}
/**
* takes the given bytearray which must rsult to a valid AnyFinderSearch- or AnyFinderAdvancedSearch-XML and performs
* a search for it.
*
* @param anyFinderAnySearchStream
* Search query as byte[].
* @return Search result.
* @throws AFException
* Exception during search.
*/
public static DAnyFinderSearchResult search(byte[] anyFinderAnySearchStream) throws AFException {
final Log log = LogFactory.getLog(AFSearch.class);
DAnyFinderSearchResult dSR = null;
try {
// get anyfinder search
final Document doc = XMLUtils.parse(anyFinderAnySearchStream, new XMLUtilsConfig());
final Element root = doc.getDocumentElement();
if ("AnyFinderSearch".equals(root.getLocalName())) {
dSR = AFSearch.search(DAnyFinderSearchCodec.decode(root));
} else if ("AnyFinderAdvancedSearch".equals(root.getLocalName())) {
final Plugin plugin = PluginFactory.getPlugin();
dSR = AFSearch.search(plugin.getAdvSearchAccess().decode(root));
} else {
dSR = new DAnyFinderSearchResult();
final DErrorMessage dErrorMessage = new DErrorMessage();
dErrorMessage.setCode("AF-ERR");
dErrorMessage.setMessage("AnyFinder Error");
dErrorMessage.setDetail("Given ByteArray is neither a AnyFinderSearch nor an AnyFinderAdvancedSearch");
dErrorMessage.setSource(AFSearch.class.getName());
dSR.setErrorMessage(dErrorMessage);
log.error(dErrorMessage.getDetail());
}
} catch (final XMLUtilsException e) {
log.error(e.getMessage());
throw new AFException("Unable to parse given xml stream", e);
} catch (final AdvSearchException e) {
throw new AFException("Unable to decode AdvancedSearch", e);
} catch (final DSearchException e) {
throw new AFException("Unable to decode search: " + e.getMessage());
} catch (final ClassCastException e) {
throw new AFException("Error during class conversion", e);
}
// exceute search
return dSR;
}
/**
* Execute search for a <code>DAnyFinderSearch</code> object.
*
* @param dSimpleSearch
* Search to execute.
* @return Search result.
*/
public static DAnyFinderSearchResult search(DAnyFinderSearch dSimpleSearch) {
final Log log = LogFactory.getLog(AFSearch.class);
final DAnyFinderSearchResult dAnyFinderSearchResult = new DAnyFinderSearchResult();
final Enumeration queries = dSimpleSearch.getQueries();
if (queries != null) {
while (queries.hasMoreElements()) {
DQuery dQuery = null;
long start = 0;
try {
dQuery = (DQuery) queries.nextElement();
String indexName = dQuery.getIndexName();
if (indexName != null) {
indexName = indexName.trim();
} else {
throw new IndexException("index name is not specified in query");
}
if ("".equals(indexName)) {
throw new IndexException("index name is empty in query");
}
final CacheManager<String, DResult> cache = CacheManagerFactory.getInstance();
start = System.currentTimeMillis();
if (indexName.indexOf(';') == -1) {
final String sQuery = dQuery.toString();
final String queryHash = getMD5Hash(sQuery);
final String queryName = byteArrayToString(sQuery.getBytes());
final String qualifiedNameInCache = "/" + dQuery.getIndexName() + "/" + queryHash;
DResult dResult = null;
if (cache.exists(qualifiedNameInCache, queryName)) {
dResult = cache.get(qualifiedNameInCache, queryName);
if (dResult != null) {
dAnyFinderSearchResult.addResult(dResult);
}
} else {
// get Pool, do query, and release it
boolean killPoolConnection = false;
IndexConnection indexConnection = null;
try {
indexConnection = IndexManager.getInstance(indexName);
if (log.isInfoEnabled()) {
log.info("Time to get instance: " + (System.currentTimeMillis() - start) + "[ms]");
}
// TODO:
dResult = null; //indexConnection.doQuery(dQuery);
if (dResult != null) {
dAnyFinderSearchResult.addResult(dResult);
}
if ((dResult != null) && (dResult.getErrorMessage() == null)) {
cache.put(qualifiedNameInCache, queryName, dResult);
}
} catch (final IndexException e) {
killPoolConnection = true;
throw e;
} finally {
if (indexConnection != null) {
IndexManager.releaseInstance(indexConnection, killPoolConnection);
if (log.isDebugEnabled()) {
log.debug("Time to Release: " + (System.currentTimeMillis() - start) + "[ms]");
}
}
}
}
} else {
searchOnMultipleIndizes(indexName, dQuery, dAnyFinderSearchResult, start);
}
if (log.isInfoEnabled()) {
log.info("Time to Query result: " + (System.currentTimeMillis() - start) + "[ms]");
}
} catch (final IndexException e) {
createErrorMessage(dAnyFinderSearchResult, dQuery.getIndexName(), e);
} catch (final Exception e) {
createErrorMessage(dAnyFinderSearchResult, dQuery.getIndexName(), e);
} // try
} // while
} // if
return dAnyFinderSearchResult;
}
/**
* Create a MD5 Hash from a String.
*
* @param in
* String to create MD5 hash from.
* @return MD5 Hash as String.
*/
private static String getMD5Hash(String in) {
final Log log = LogFactory.getLog(AFSearch.class);
final StringBuffer result = new StringBuffer(32);
try {
final MessageDigest md5 = MessageDigest.getInstance("MD5");
md5.update(in.getBytes());
final Formatter f = new Formatter(result);
for (final byte b : md5.digest()) {
f.format("%02x", b);
}
} catch (final NoSuchAlgorithmException ex) {
if (log.isErrorEnabled()) {
log.error(ex);
}
}
return result.toString();
}
/**
* Create a String from a byte[].
*
* @param bytes
* Byte[] for string conversion.
* @return Byte[] as String.
*/
private static String byteArrayToString(byte[] bytes) {
final StringBuffer result = new StringBuffer(32);
final Formatter f = new Formatter(result);
for (final byte b : bytes) {
f.format("%02x", b);
}
return result.toString();
}
/**
* Search on multiple indices with extended search result merging and caching.
*
* @param indexName
* Index names for search query. (';' is delemiter)
* @param dQuery
* Search query to execute.
* @param dAnyFinderSearchResult
* Search result.
* @param startTime
* Start time for logging.
* @throws IndexException
* Search problems.
* @throws CacheException
* Exception during cache access.
*/
private static void searchOnMultipleIndizes(String indexName, DQuery dQuery,
final DAnyFinderSearchResult dAnyFinderSearchResult, long startTime) throws IndexException, CacheException {
final Log log = LogFactory.getLog(AFSearch.class);
String[] indexNames = indexName.split(";");
indexNames = normalizeIndexNames(indexNames);
final HashMap<String, DHitDistribution> hitDistributions = getHitDistributionsForQuery(indexNames, dQuery);
int startHits = 0;
if (dQuery.getStartHits() != null) {
startHits = dQuery.getStartHits().intValue();
}
final QueryScope[] queryScopes =
FederatedQueryHandling.calculateQueries(indexNames, startHits, dQuery.getMaxHits(), hitDistributions);
final CacheManager<String, DResult> cache = CacheManagerFactory.getInstance();
final DResult dMergedResult = new DResult();
dMergedResult.setName(indexName);
for (final QueryScope queryScope : queryScopes) {
boolean killPoolConnection = false;
IndexConnection indexConnection = null;
try {
final long start2 = System.currentTimeMillis();
dQuery.setIndexName(queryScope.getIndexName());
dQuery.setStartHits(queryScope.getStart());
dQuery.setMaxHits(queryScope.getHits());
final String sQuery = dQuery.toString();
final String queryHash = getMD5Hash(sQuery);
final String queryName = byteArrayToString(sQuery.getBytes());
final String qualifiedNameInCache = "/" + dQuery.getIndexName() + "/" + queryHash;
DResult dResult = null;
if (cache.exists(qualifiedNameInCache, queryName)) {
dResult = cache.get(qualifiedNameInCache, queryName);
} else {
indexConnection = IndexManager.getInstance(queryScope.getIndexName());
if (log.isInfoEnabled()) {
log.info("Time to get instance [" + queryScope.getIndexName() + "]: "
+ (System.currentTimeMillis() - start2) + "[ms]");
}
// TODO:
dResult = null; //indexConnection.doQuery(dQuery);
if ((dResult != null) && (dResult.getErrorMessage() == null)) {
cache.put(qualifiedNameInCache, queryName, dResult);
}
}
if (dResult != null) {
if (dResult.getErrorMessage() != null) {
dMergedResult.clearItems();
dMergedResult.setErrorMessage(dResult.getErrorMessage());
dMergedResult.setHitDistribution(null);
break;
}
addItemsToExistingResult(startHits, dMergedResult, dResult, queryScope);
}
} catch (final IndexException e) {
killPoolConnection = true;
throw e;
} finally {
if (indexConnection != null) {
IndexManager.releaseInstance(indexConnection, killPoolConnection);
if (log.isDebugEnabled()) {
log.debug("Time to Release [" + queryScope.getIndexName() + "]: "
+ (System.currentTimeMillis() - startTime) + "[ms]");
}
}
}
}
final HashMap<Integer, DHit> hits = new HashMap<Integer, DHit>();
for (final DHitDistribution hitDistribution : hitDistributions.values()) {
updateHitDistribution(hitDistribution, hits);
}
final DHitDistribution hd = new DHitDistribution();
for (final Iterator iter = hits.values().iterator(); iter.hasNext();) {
final DHit hit = (DHit) iter.next();
hd.addHit(hit);
}
if (hd.getHitsCount() != 0) {
dMergedResult.setHitDistribution(hd);
}
dMergedResult.setHits(dMergedResult.getItemsCount());
dAnyFinderSearchResult.addResult(dMergedResult);
}
/**
* This function removes dublicate entries from a set of index names. Index names are treated case sensitive.
*
* @param indexNames
* Index names to normalize.
* @return Normalized index names.
*/
public static String[] normalizeIndexNames(String[] indexNames) {
final Set<String> indexNamesAsSet = new HashSet<String>();
// TODO: check order of returned index names. ==> order could probably have a result for paging
for (final String indexName : indexNames) {
indexNamesAsSet.add(indexName.trim());
}
return indexNamesAsSet.toArray(new String[0]);
}
/**
* @param indexNames
* Array of indices to query on.
* @param query
* Base query for request.
* @return DHitDistribution per index.
* @throws IndexException
* Exception statitng that a search has failed.
* @throws CacheException
* Exception during cache access.
*/
private static HashMap<String, DHitDistribution> getHitDistributionsForQuery(String[] indexNames, DQuery query)
throws IndexException, CacheException {
final DQuery queryClone = (DQuery) query.clone();
// start at first document; these is a good chance for a cached result
queryClone.setStartHits(0);
final HashMap<String, DHitDistribution> hitDistributions = new HashMap<String, DHitDistribution>();
final CacheManager<String, DResult> cache = CacheManagerFactory.getInstance();
for (final String indexName : indexNames) {
queryClone.setIndexName(indexName);
final String sQuery = queryClone.toString();
final String queryHash = getMD5Hash(sQuery);
final String queryName = byteArrayToString(sQuery.getBytes());
boolean killPoolConnection = false;
IndexConnection indexConnection = null;
try {
DResult dResult = null;
final String qualifiedNameInCache = "/" + queryClone.getIndexName() + "/" + queryHash;
if (cache.exists(qualifiedNameInCache, queryName)) {
dResult = cache.get(qualifiedNameInCache, queryName);
dResult = cache.get(qualifiedNameInCache, queryName);
dResult = cache.get(qualifiedNameInCache, queryName);
dResult = cache.get(qualifiedNameInCache, queryName);
dResult = cache.get(qualifiedNameInCache, queryName);
} else {
indexConnection = IndexManager.getInstance(indexName);
// TODO:
dResult = null; //indexConnection.doQuery(queryClone);
cache.put(qualifiedNameInCache, queryName, dResult);
}
if ((dResult != null) && (dResult.getErrorMessage() == null)) {
hitDistributions.put(indexName, dResult.getHitDistribution());
}
} catch (final IndexException e) {
killPoolConnection = true;
throw e;
} finally {
if (indexConnection != null) {
IndexManager.releaseInstance(indexConnection, killPoolConnection);
}
}
}
return hitDistributions;
}
/**
* Add items to an existing search result. The basis structure for the hit distribution is also updated.
*
* @param startHits
* Start of hits in result set.
* @param targetResult
* Target search result.
* @param sourceResult
* Source search result.
* @param queryScope
* Query scope item selection.
*/
private static void addItemsToExistingResult(final int startHits, final DResult targetResult,
final DResult sourceResult, final QueryScope queryScope) {
// add items
int recordsSelected = 0;
final int resultPositionStartSelection = (queryScope.getStartSelection() % queryScope.getHits()) - 1;
for (int j = 0; j < sourceResult.getItemsCount(); j++) {
if ((j >= resultPositionStartSelection) && (recordsSelected < queryScope.getRecordsToSelect())) {
recordsSelected++;
final DItem item = (DItem) sourceResult.getItem(j).clone();
item.setPos(targetResult.getItemsCount() + startHits);
targetResult.addItem(item);
}
}
}
/**
* Update hit distribution.
*
* @param srcHD
* Source hit distribution.
* @param hits
* Structure containing the merged hit distribution.
*/
private static void updateHitDistribution(final DHitDistribution srcHD, final HashMap<Integer, DHit> hits) {
for (int j = 0; j < srcHD.getHitsCount(); j++) {
final DHit srcHit = srcHD.getHit(j);
if (hits.containsKey(new Integer(srcHit.getScore()))) {
final DHit hit = hits.get(new Integer(srcHit.getScore()));
hit.setHits(srcHit.getHits() + hit.getHits());
} else {
hits.put(new Integer(srcHit.getScore()), new DHit(srcHit.getScore(), srcHit.getHits()));
}
}
}
/**
* Takes <code>IAdvSearch</code> object and performs a search with it. The result could be multiple queries.
*
* @param dAdvancedSearch
* Search to be executed.
* @return Search result.
*/
public static DAnyFinderSearchResult search(IAdvSearch dAdvancedSearch) {
final DAnyFinderSearchResult dAnyFinderSearchResult = new DAnyFinderSearchResult();
// technically several QE are supported per AFAS but only one is currently allowed
final Iterator queries = dAdvancedSearch.getQueryExpressions();
while (queries.hasNext()) {
final IQueryExpression dQE = (IQueryExpression) queries.next();
final DResult dResult = search(dQE, dQE.getIndexName());
if (dResult != null) {
dAnyFinderSearchResult.addResult(dResult);
}
} // while
return dAnyFinderSearchResult;
}
/**
* Takes either a <code>.search.DQuery</code> or a <code>.advsearch.DQueryExpression</code> object and performs
* with that a search.
*
* @param dAnyQuery
* DQuery or IQueryExpression object containing an query to execute.
* @param indexName
* Name of index onto which the query is executed.
* @return Search result.
*/
private static DResult search(Object dAnyQuery, String indexName) {
final Log log = LogFactory.getLog(AFSearch.class);
DResult dResult = null;
IndexConnection indexConnection = null;
long start = 0;
boolean killPoolConnection = false;
// get Pool, do query, and release it
try {
start = System.currentTimeMillis();
indexConnection = IndexManager.getInstance(indexName);
if (log.isInfoEnabled()) {
log.info("Time to get instance: " + (System.currentTimeMillis() - start) + "[ms]");
}
// perform query
if (dAnyQuery instanceof DQuery) {
// TODO:
dResult = null; //indexConnection.doQuery((DQuery) dAnyQuery);
} else if (dAnyQuery instanceof IQueryExpression) {
// TODO:
dResult = null; //indexConnection.doQuery((IQueryExpression) dAnyQuery);
} else {
dResult = new DResult();
dResult.setName(indexName);
dResult.setHits(0);
final DErrorMessage dErrorMessage = new DErrorMessage();
dErrorMessage.setCode("AF-ERR");
dErrorMessage.setMessage("AnyFinder Error");
dErrorMessage.setDetail("Object dAnyQuery is neither of Class DQuery nor DQueryExpression");
dErrorMessage.setSource(AFSearch.class.getName());
dResult.setErrorMessage(dErrorMessage);
log.error(dErrorMessage.getDetail());
killPoolConnection = true;
}
if (log.isInfoEnabled()) {
log.info("Time to Query result: " + (System.currentTimeMillis() - start) + "[ms]");
}
} catch (final IndexException e) {
dResult = createErrorMessage2(indexName, e);
killPoolConnection = true;
} catch (final Exception e) {
dResult = createErrorMessage2(indexName, e);
killPoolConnection = true;
} finally {
if (indexConnection != null) {
IndexManager.releaseInstance(indexConnection, killPoolConnection);
if (log.isDebugEnabled()) {
log.debug("Time to Release: " + (System.currentTimeMillis() - start) + "[ms]");
}
}
} // try
return dResult;
}
/**
* Prepare and log error message for result.
*
* @param dAnyFinderSearchResult
* Search result where the error is appended.
* @param indexName
* Index name.
* @param exception
* Exception occured in code.
*/
private static void createErrorMessage(final DAnyFinderSearchResult dAnyFinderSearchResult, String indexName,
Exception exception) {
final Log log = LogFactory.getLog(AFSearch.class);
final DResult dResult = prepareErrorMessageFromException(indexName, exception);
dAnyFinderSearchResult.addResult(dResult);
log.error("", exception);
}
/**
* Prepare DResult containing error description.
*
* @param indexName
* Index name.
* @param exception
* Exception occured in code.
* @return DResult containing error description.
*/
private static DResult prepareErrorMessageFromException(String indexName, Exception exception) {
final DResult dResult = new DResult();
dResult.setName(indexName);
dResult.setHits(0);
final DErrorMessage dErrorMessage = new DErrorMessage();
dErrorMessage.setCode("AF-ERR");
dErrorMessage.setMessage("AnyFinder Error");
if (exception.getMessage() != null) {
dErrorMessage.setDetail(exception.getMessage());
} else {
dErrorMessage.setDetail(exception.getClass().getName());
}
dErrorMessage.setSource(AFSearch.class.getName());
dResult.setErrorMessage(dErrorMessage);
return dResult;
}
/**
* Create error message result structure.
*
* @param indexName
* Index name where the error occured in request.
* @param exception
* Exception in code.
* @return DResult containing an error message.
*/
private static DResult createErrorMessage2(String indexName, Exception exception) {
final Log log = LogFactory.getLog(AFSearch.class);
final DResult dResult = prepareErrorMessageFromException(indexName, exception);
log.error("", exception);
return dResult;
}
/**
* Converts a <code>DAnyFinderSearchResult</code> to a byte[].
*
* @param dAnyFinderSearchResult
* search result to transform.
* @return Streamed search result.
* @throws AFException
* Streaming error.
*/
public static final byte[] toStream(DAnyFinderSearchResult dAnyFinderSearchResult) throws AFException {
try {
final Document doc = DAnyFinderSearchResultCodec.encode(dAnyFinderSearchResult);
final byte[] bytes = XMLUtils.stream(doc.getDocumentElement(), true, true);
return bytes;
} catch (final DSearchResultException e) {
throw new AFException("unable encode AnyFinderSearchResult", e);
} catch (final XMLUtilsException e) {
throw new AFException("unable to stream AnyFinderSearchResult", e);
}
}
}