core/org.eclipse.smila.solr/lib/source/org/apache/lucene/search/grouping/AbstractAllGroupHeadsCollector.java - smila/org.eclipse.smila.core - Git at Google

 package org.apache.lucene.search.grouping;

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.search.Collector;
 import org.apache.lucene.util.FixedBitSet;

 import java.io.IOException;
 import java.util.Collection;

 /**
  * This collector specializes in collecting the most relevant document (group head) for each group that match the query.
  *
  * @lucene.experimental
  */
 @SuppressWarnings({"unchecked","rawtypes"})
 public abstract class AbstractAllGroupHeadsCollector<GH extends AbstractAllGroupHeadsCollector.GroupHead> extends Collector {

   protected final int[] reversed;
   protected final int compIDXEnd;
   protected final TemporalResult temporalResult;

   protected AbstractAllGroupHeadsCollector(int numberOfSorts) {
     this.reversed = new int[numberOfSorts];
     this.compIDXEnd = numberOfSorts - 1;
     temporalResult = new TemporalResult();
   }

   /**
    * @param maxDoc The maxDoc of the top level {@link IndexReader}.
    * @return a {@link FixedBitSet} containing all group heads.
    */
   public FixedBitSet retrieveGroupHeads(int maxDoc) {
     FixedBitSet bitSet = new FixedBitSet(maxDoc);

     Collection<GH> groupHeads = getCollectedGroupHeads();
     for (GroupHead groupHead : groupHeads) {
       bitSet.set(groupHead.doc);
     }

     return bitSet;
   }

   /**
    * @return an int array containing all group heads. The size of the array is equal to number of collected unique groups.
    */
   public int[] retrieveGroupHeads() {
     Collection<GH> groupHeads = getCollectedGroupHeads();
     int[] docHeads = new int[groupHeads.size()];

     int i = 0;
     for (GroupHead groupHead : groupHeads) {
       docHeads[i++] = groupHead.doc;
     }

     return docHeads;
   }

   /**
    * @return the number of group heads found for a query.
    */
   public int groupHeadsSize() {
     return getCollectedGroupHeads().size();
   }

   /**
    * Returns the group head and puts it into {@link #temporalResult}.
    * If the group head wasn't encountered before then it will be added to the collected group heads.
    * <p/>
    * The {@link TemporalResult#stop} property will be <code>true</code> if the group head wasn't encountered before
    * otherwise <code>false</code>.
    *
    * @param doc The document to retrieve the group head for.
    * @throws IOException If I/O related errors occur
    */
   protected abstract void retrieveGroupHeadAndAddIfNotExist(int doc) throws IOException;

   /**
    * Returns the collected group heads.
    * Subsequent calls should return the same group heads.
    *
    * @return the collected group heads
    */
   protected abstract Collection<GH> getCollectedGroupHeads();

   @Override
   public void collect(int doc) throws IOException {
     retrieveGroupHeadAndAddIfNotExist(doc);
     if (temporalResult.stop) {
       return;
     }
     GH groupHead = temporalResult.groupHead;

     // Ok now we need to check if the current doc is more relevant then current doc for this group
     for (int compIDX = 0; ; compIDX++) {
       final int c = reversed[compIDX] * groupHead.compare(compIDX, doc);
       if (c < 0) {
         // Definitely not competitive. So don't even bother to continue
         return;
       } else if (c > 0) {
         // Definitely competitive.
         break;
       } else if (compIDX == compIDXEnd) {
         // Here c=0. If we're at the last comparator, this doc is not
         // competitive, since docs are visited in doc Id order, which means
         // this doc cannot compete with any other document in the queue.
         return;
       }
     }
     groupHead.updateDocHead(doc);
   }

   @Override
   public boolean acceptsDocsOutOfOrder() {
     return false;
   }

   /**
    * Contains the result of group head retrieval.
    * To prevent new object creations of this class for every collect.
    */
   protected class TemporalResult {

     public GH groupHead;
     public boolean stop;

   }

   /**
    * Represents a group head. A group head is the most relevant document for a particular group.
    * The relevancy is based is usually based on the sort.
    *
    * The group head contains a group value with its associated most relevant document id.
    */
   public static abstract class GroupHead<GROUP_VALUE_TYPE> {

     public final GROUP_VALUE_TYPE groupValue;
     public int doc;

     protected GroupHead(GROUP_VALUE_TYPE groupValue, int doc) {
       this.groupValue = groupValue;
       this.doc = doc;
     }

     /**
      * Compares the specified document for a specified comparator against the current most relevant document.
      *
      * @param compIDX The comparator index of the specified comparator.
      * @param doc The specified document.
      * @return -1 if the specified document wasn't competitive against the current most relevant document, 1 if the
      *         specified document was competitive against the current most relevant document. Otherwise 0.
      * @throws IOException If I/O related errors occur
      */
     protected abstract int compare(int compIDX, int doc) throws IOException;

     /**
      * Updates the current most relevant document with the specified document.
      *
      * @param doc The specified document
      * @throws IOException If I/O related errors occur
      */
     protected abstract void updateDocHead(int doc) throws IOException;

   }

 }
	package org.apache.lucene.search.grouping;

	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import org.apache.lucene.index.IndexReader;
	import org.apache.lucene.search.Collector;
	import org.apache.lucene.util.FixedBitSet;

	import java.io.IOException;
	import java.util.Collection;

	/**
	* This collector specializes in collecting the most relevant document (group head) for each group that match the query.
	*
	* @lucene.experimental
	*/
	@SuppressWarnings({"unchecked","rawtypes"})
	public abstract class AbstractAllGroupHeadsCollector<GH extends AbstractAllGroupHeadsCollector.GroupHead> extends Collector {

	protected final int[] reversed;
	protected final int compIDXEnd;
	protected final TemporalResult temporalResult;

	protected AbstractAllGroupHeadsCollector(int numberOfSorts) {
	this.reversed = new int[numberOfSorts];
	this.compIDXEnd = numberOfSorts - 1;
	temporalResult = new TemporalResult();
	}

	/**
	* @param maxDoc The maxDoc of the top level {@link IndexReader}.
	* @return a {@link FixedBitSet} containing all group heads.
	*/
	public FixedBitSet retrieveGroupHeads(int maxDoc) {
	FixedBitSet bitSet = new FixedBitSet(maxDoc);

	Collection<GH> groupHeads = getCollectedGroupHeads();
	for (GroupHead groupHead : groupHeads) {
	bitSet.set(groupHead.doc);
	}

	return bitSet;
	}

	/**
	* @return an int array containing all group heads. The size of the array is equal to number of collected unique groups.
	*/
	public int[] retrieveGroupHeads() {
	Collection<GH> groupHeads = getCollectedGroupHeads();
	int[] docHeads = new int[groupHeads.size()];

	int i = 0;
	for (GroupHead groupHead : groupHeads) {
	docHeads[i++] = groupHead.doc;
	}

	return docHeads;
	}

	/**
	* @return the number of group heads found for a query.
	*/
	public int groupHeadsSize() {
	return getCollectedGroupHeads().size();
	}

	/**
	* Returns the group head and puts it into {@link #temporalResult}.
	* If the group head wasn't encountered before then it will be added to the collected group heads.
	* <p/>
	* The {@link TemporalResult#stop} property will be <code>true</code> if the group head wasn't encountered before
	* otherwise <code>false</code>.
	*
	* @param doc The document to retrieve the group head for.
	* @throws IOException If I/O related errors occur
	*/
	protected abstract void retrieveGroupHeadAndAddIfNotExist(int doc) throws IOException;

	/**
	* Returns the collected group heads.
	* Subsequent calls should return the same group heads.
	*
	* @return the collected group heads
	*/
	protected abstract Collection<GH> getCollectedGroupHeads();

	@Override
	public void collect(int doc) throws IOException {
	retrieveGroupHeadAndAddIfNotExist(doc);
	if (temporalResult.stop) {
	return;
	}
	GH groupHead = temporalResult.groupHead;

	// Ok now we need to check if the current doc is more relevant then current doc for this group
	for (int compIDX = 0; ; compIDX++) {
	final int c = reversed[compIDX] * groupHead.compare(compIDX, doc);
	if (c < 0) {
	// Definitely not competitive. So don't even bother to continue
	return;
	} else if (c > 0) {
	// Definitely competitive.
	break;
	} else if (compIDX == compIDXEnd) {
	// Here c=0. If we're at the last comparator, this doc is not
	// competitive, since docs are visited in doc Id order, which means
	// this doc cannot compete with any other document in the queue.
	return;
	}
	}
	groupHead.updateDocHead(doc);
	}

	@Override
	public boolean acceptsDocsOutOfOrder() {
	return false;
	}

	/**
	* Contains the result of group head retrieval.
	* To prevent new object creations of this class for every collect.
	*/
	protected class TemporalResult {

	public GH groupHead;
	public boolean stop;

	}

	/**
	* Represents a group head. A group head is the most relevant document for a particular group.
	* The relevancy is based is usually based on the sort.
	*
	* The group head contains a group value with its associated most relevant document id.
	*/
	public static abstract class GroupHead<GROUP_VALUE_TYPE> {

	public final GROUP_VALUE_TYPE groupValue;
	public int doc;

	protected GroupHead(GROUP_VALUE_TYPE groupValue, int doc) {
	this.groupValue = groupValue;
	this.doc = doc;
	}

	/**
	* Compares the specified document for a specified comparator against the current most relevant document.
	*
	* @param compIDX The comparator index of the specified comparator.
	* @param doc The specified document.
	* @return -1 if the specified document wasn't competitive against the current most relevant document, 1 if the
	* specified document was competitive against the current most relevant document. Otherwise 0.
	* @throws IOException If I/O related errors occur
	*/
	protected abstract int compare(int compIDX, int doc) throws IOException;

	/**
	* Updates the current most relevant document with the specified document.
	*
	* @param doc The specified document
	* @throws IOException If I/O related errors occur
	*/
	protected abstract void updateDocHead(int doc) throws IOException;

	}

	}