blob: dc0d0f38a6ec446e02cc71e5b2eaefaaf386fcc0 [file] [log] [blame]
/***********************************************************************************************************************
* Copyright (c) 2008 empolis GmbH and brox IT Solutions GmbH. All rights reserved. This program and the accompanying
* materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this distribution,
* and is available at http://www.eclipse.org/legal/epl-v10.html
*
* Contributors: August Georg Schmidt (brox IT Solutions GmbH) - initial API and implementation
**********************************************************************************************************************/
package org.eclipse.smila.search;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.commons.collections.comparators.ReverseComparator;
import org.eclipse.smila.search.search.tools.searchresult.DHit;
import org.eclipse.smila.search.search.tools.searchresult.DHitDistribution;
/**
* @author August Georg Schmidt (BROX)
*
* This class is a utility class for federated query handling.
*/
public final class FederatedQueryHandling {
/**
* Hide constructor. Class is used in a static way.
*/
private FederatedQueryHandling() {
}
/**
* This method creates a list of queries to be executed during search process.
*
* @param indexNames
* Index names from search query.
* @param startHits
* Start of hits in logical result structure.
* @param maxHits
* Maximum number of hits to be returned.
* @param hitDistributions
* Hit Distributions of all indices.
* @return Array of QueryScope objects to be processed.
*/
public static QueryScope[] calculateQueries(String[] indexNames, int startHits, int maxHits,
HashMap<String, DHitDistribution> hitDistributions) {
// prepare data structure containing all hits grouped/sorted by score and index name.
final SortedMap<Integer, List<HitsPerIndex>> indicesPerHitLevel =
calculateIndicesPerHitLevel(indexNames, hitDistributions);
final HashMap<String, QueryScope> queryScopes = calculateQueryScopes(startHits, maxHits, indicesPerHitLevel);
final HashMap<String, Integer> resolvedRecords =
calculateStartPositionForQueryScopes(startHits, maxHits, indicesPerHitLevel);
// assign selection start
for (QueryScope queryScope : queryScopes.values()) {
if (resolvedRecords.containsKey(queryScope.getIndexName())) {
queryScope.setStartSelection(resolvedRecords.get(queryScope.getIndexName()));
}
}
// prepare query scope return in correct order and do query scope extension
final ArrayList<QueryScope> queryScopeOrderedResult = new ArrayList<QueryScope>();
for (String indexName : indexNames) {
if (queryScopes.containsKey(indexName)) {
QueryScope queryScope = queryScopes.get(indexName);
queryScopeOrderedResult.add(queryScope);
final int alreadySpentRecords = queryScope.getStartSelection() - queryScope.getStart() - 1;
final int spendAndSelectedRecords = (alreadySpentRecords + queryScope.getRecordsToSelect());
if (spendAndSelectedRecords > queryScope.getHits()) {
final int oldRecordsToSelect = queryScope.getRecordsToSelect();
queryScope.setRecordsToSelect(queryScope.getHits() - alreadySpentRecords);
final int newRecordsToSelect = oldRecordsToSelect - queryScope.getRecordsToSelect();
queryScope =
new QueryScope(queryScope.getIndexName(), queryScope.getStart() + queryScope.getHits(), queryScope
.getHits(), newRecordsToSelect, queryScope.getStart() + queryScope.getHits() + 1);
queryScopeOrderedResult.add(queryScope);
}
}
}
return queryScopeOrderedResult.toArray(new QueryScope[0]);
}
/**
* This method calculates the start position for query scopes.
*
* @param startHits
* Start of hits in search result.
* @param maxHits
* Maximum number of hits in search result.
* @param indicesPerHitLevel
* Structure with hit distribution gouped by score.
* @return Start positions of a query scope by index name.
*/
private static HashMap<String, Integer> calculateStartPositionForQueryScopes(int startHits, int maxHits,
final SortedMap<Integer, List<HitsPerIndex>> indicesPerHitLevel) {
final HashMap<String, Integer> positionPerIndex = new HashMap<String, Integer>();
final HashMap<String, Integer> startPositionPerIndex = new HashMap<String, Integer>();
int hitsFetched = 0;
int position = 0;
for (List<HitsPerIndex> hitsPerIndexList : indicesPerHitLevel.values()) {
for (final HitsPerIndex hitsPerIndex : hitsPerIndexList) {
final String indexName = hitsPerIndex.getIndexName();
if (!positionPerIndex.containsKey(indexName)) {
positionPerIndex.put(indexName, 0);
}
int positionInIndex = 0;
positionInIndex = positionPerIndex.get(indexName);
for (int i = 0; i < hitsPerIndex.getHits(); i++) {
position++;
positionInIndex++;
positionPerIndex.put(indexName, positionInIndex);
final boolean hitsShouldBeFetched = hitsFetched < maxHits;
if ((hitsShouldBeFetched) && (position > startHits)) {
hitsFetched++;
if (!startPositionPerIndex.containsKey(indexName)) {
startPositionPerIndex.put(indexName, positionInIndex);
}
} else {
if (!hitsShouldBeFetched) {
return startPositionPerIndex;
}
}
}
}
}
return startPositionPerIndex;
}
/**
* This method calculates the query scopes for several results.
*
* @param startHits
* Start of hits in search result.
* @param maxHits
* Maximum number of hits in search result.
* @param indicesPerHitLevel
* Structure with hit distribution gouped by score.
* @return Query scopes.
*/
private static HashMap<String, QueryScope> calculateQueryScopes(int startHits, int maxHits,
final SortedMap<Integer, List<HitsPerIndex>> indicesPerHitLevel) {
final HashMap<String, QueryScope> queryScopes = new HashMap<String, QueryScope>();
final HashMap<String, Integer> positionPerIndex = new HashMap<String, Integer>();
int recordsCollected = 0;
int currentPosition = 0;
for (List<HitsPerIndex> hitsPerIndexList : indicesPerHitLevel.values()) {
for (final HitsPerIndex hitsPerIndex : hitsPerIndexList) {
final String indexName = hitsPerIndex.getIndexName();
if (!positionPerIndex.containsKey(indexName)) {
positionPerIndex.put(indexName, 0);
}
int positionInIndex = 0;
positionInIndex = positionPerIndex.get(indexName);
for (int i = 0; i < hitsPerIndex.getHits(); i++) {
currentPosition++;
positionInIndex++;
positionPerIndex.put(indexName, positionInIndex);
if (currentPosition <= startHits) {
continue;
}
if (recordsCollected < maxHits) {
QueryScope queryScope = null;
if (!queryScopes.containsKey(indexName)) {
final int start = (((int) Math.floor((float) (positionInIndex - 1) / maxHits)) * maxHits);
queryScope = new QueryScope(indexName, start, maxHits);
queryScopes.put(indexName, queryScope);
} else {
queryScope = queryScopes.get(indexName);
}
recordsCollected++;
queryScope.setRecordsToSelect(queryScope.getRecordsToSelect() + 1);
} else {
return queryScopes;
}
}
}
}
return queryScopes;
}
/**
* This method creates a sorted map containing all indices and hits grouped by score.
*
* @param indexNames
* Name of indices.
* @param hitDistributions
* Hit distributions.
* @return Sorted set containing all hits grouped by score and index.
*/
@SuppressWarnings("unchecked")
private static SortedMap<Integer, List<HitsPerIndex>> calculateIndicesPerHitLevel(String[] indexNames,
HashMap<String, DHitDistribution> hitDistributions) {
final SortedMap<Integer, List<HitsPerIndex>> indicesPerHitLevel =
new TreeMap<Integer, List<HitsPerIndex>>(new ReverseComparator());
for (String indexName : indexNames) {
if (!hitDistributions.containsKey(indexName)) {
continue;
}
final DHitDistribution hitDistribution = hitDistributions.get(indexName);
for (final Enumeration hits = hitDistribution.getHits(); hits.hasMoreElements();) {
final DHit hit = (DHit) hits.nextElement();
if (!indicesPerHitLevel.containsKey(hit.getScore())) {
indicesPerHitLevel.put(hit.getScore(), new ArrayList<HitsPerIndex>());
}
indicesPerHitLevel.get(hit.getScore()).add(new HitsPerIndex(indexName, hit.getScore(), hit.getHits()));
}
}
return indicesPerHitLevel;
}
}