| package org.apache.lucene.search.grouping; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.search.Collector; |
| import org.apache.lucene.util.FixedBitSet; |
| |
| import java.io.IOException; |
| import java.util.Collection; |
| |
| /** |
| * This collector specializes in collecting the most relevant document (group head) for each group that match the query. |
| * |
| * @lucene.experimental |
| */ |
| @SuppressWarnings({"unchecked","rawtypes"}) |
| public abstract class AbstractAllGroupHeadsCollector<GH extends AbstractAllGroupHeadsCollector.GroupHead> extends Collector { |
| |
| protected final int[] reversed; |
| protected final int compIDXEnd; |
| protected final TemporalResult temporalResult; |
| |
| protected AbstractAllGroupHeadsCollector(int numberOfSorts) { |
| this.reversed = new int[numberOfSorts]; |
| this.compIDXEnd = numberOfSorts - 1; |
| temporalResult = new TemporalResult(); |
| } |
| |
| /** |
| * @param maxDoc The maxDoc of the top level {@link IndexReader}. |
| * @return a {@link FixedBitSet} containing all group heads. |
| */ |
| public FixedBitSet retrieveGroupHeads(int maxDoc) { |
| FixedBitSet bitSet = new FixedBitSet(maxDoc); |
| |
| Collection<GH> groupHeads = getCollectedGroupHeads(); |
| for (GroupHead groupHead : groupHeads) { |
| bitSet.set(groupHead.doc); |
| } |
| |
| return bitSet; |
| } |
| |
| /** |
| * @return an int array containing all group heads. The size of the array is equal to number of collected unique groups. |
| */ |
| public int[] retrieveGroupHeads() { |
| Collection<GH> groupHeads = getCollectedGroupHeads(); |
| int[] docHeads = new int[groupHeads.size()]; |
| |
| int i = 0; |
| for (GroupHead groupHead : groupHeads) { |
| docHeads[i++] = groupHead.doc; |
| } |
| |
| return docHeads; |
| } |
| |
| /** |
| * @return the number of group heads found for a query. |
| */ |
| public int groupHeadsSize() { |
| return getCollectedGroupHeads().size(); |
| } |
| |
| /** |
| * Returns the group head and puts it into {@link #temporalResult}. |
| * If the group head wasn't encountered before then it will be added to the collected group heads. |
| * <p/> |
| * The {@link TemporalResult#stop} property will be <code>true</code> if the group head wasn't encountered before |
| * otherwise <code>false</code>. |
| * |
| * @param doc The document to retrieve the group head for. |
| * @throws IOException If I/O related errors occur |
| */ |
| protected abstract void retrieveGroupHeadAndAddIfNotExist(int doc) throws IOException; |
| |
| /** |
| * Returns the collected group heads. |
| * Subsequent calls should return the same group heads. |
| * |
| * @return the collected group heads |
| */ |
| protected abstract Collection<GH> getCollectedGroupHeads(); |
| |
| @Override |
| public void collect(int doc) throws IOException { |
| retrieveGroupHeadAndAddIfNotExist(doc); |
| if (temporalResult.stop) { |
| return; |
| } |
| GH groupHead = temporalResult.groupHead; |
| |
| // Ok now we need to check if the current doc is more relevant then current doc for this group |
| for (int compIDX = 0; ; compIDX++) { |
| final int c = reversed[compIDX] * groupHead.compare(compIDX, doc); |
| if (c < 0) { |
| // Definitely not competitive. So don't even bother to continue |
| return; |
| } else if (c > 0) { |
| // Definitely competitive. |
| break; |
| } else if (compIDX == compIDXEnd) { |
| // Here c=0. If we're at the last comparator, this doc is not |
| // competitive, since docs are visited in doc Id order, which means |
| // this doc cannot compete with any other document in the queue. |
| return; |
| } |
| } |
| groupHead.updateDocHead(doc); |
| } |
| |
| @Override |
| public boolean acceptsDocsOutOfOrder() { |
| return false; |
| } |
| |
| /** |
| * Contains the result of group head retrieval. |
| * To prevent new object creations of this class for every collect. |
| */ |
| protected class TemporalResult { |
| |
| public GH groupHead; |
| public boolean stop; |
| |
| } |
| |
| /** |
| * Represents a group head. A group head is the most relevant document for a particular group. |
| * The relevancy is based is usually based on the sort. |
| * |
| * The group head contains a group value with its associated most relevant document id. |
| */ |
| public static abstract class GroupHead<GROUP_VALUE_TYPE> { |
| |
| public final GROUP_VALUE_TYPE groupValue; |
| public int doc; |
| |
| protected GroupHead(GROUP_VALUE_TYPE groupValue, int doc) { |
| this.groupValue = groupValue; |
| this.doc = doc; |
| } |
| |
| /** |
| * Compares the specified document for a specified comparator against the current most relevant document. |
| * |
| * @param compIDX The comparator index of the specified comparator. |
| * @param doc The specified document. |
| * @return -1 if the specified document wasn't competitive against the current most relevant document, 1 if the |
| * specified document was competitive against the current most relevant document. Otherwise 0. |
| * @throws IOException If I/O related errors occur |
| */ |
| protected abstract int compare(int compIDX, int doc) throws IOException; |
| |
| /** |
| * Updates the current most relevant document with the specified document. |
| * |
| * @param doc The specified document |
| * @throws IOException If I/O related errors occur |
| */ |
| protected abstract void updateDocHead(int doc) throws IOException; |
| |
| } |
| |
| } |