core/org.eclipse.smila.solr/lib/source/org/apache/lucene/search/spans/NearSpansOrdered.java - smila/org.eclipse.smila.core - Git at Google

 package org.apache.lucene.search.spans;

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermContext;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.InPlaceMergeSorter;

 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Collection;
 import java.util.Map;
 import java.util.Set;

 /** A Spans that is formed from the ordered subspans of a SpanNearQuery
  * where the subspans do not overlap and have a maximum slop between them.
  * <p>
  * The formed spans only contains minimum slop matches.<br>
  * The matching slop is computed from the distance(s) between
  * the non overlapping matching Spans.<br>
  * Successive matches are always formed from the successive Spans
  * of the SpanNearQuery.
  * <p>
  * The formed spans may contain overlaps when the slop is at least 1.
  * For example, when querying using
  * <pre>t1 t2 t3</pre>
  * with slop at least 1, the fragment:
  * <pre>t1 t2 t1 t3 t2 t3</pre>
  * matches twice:
  * <pre>t1 t2 .. t3      </pre>
  * <pre>      t1 .. t2 t3</pre>
  *
  *
  * Expert:
  * Only public for subclassing.  Most implementations should not need this class
  */
 public class NearSpansOrdered extends Spans {
   private final int allowedSlop;
   private boolean firstTime = true;
   private boolean more = false;

   /** The spans in the same order as the SpanNearQuery */
   private final Spans[] subSpans;

   /** Indicates that all subSpans have same doc() */
   private boolean inSameDoc = false;

   private int matchDoc = -1;
   private int matchStart = -1;
   private int matchEnd = -1;
   private List<byte[]> matchPayload;

   private final Spans[] subSpansByDoc;
   // Even though the array is probably almost sorted, InPlaceMergeSorter will likely
   // perform better since it has a lower overhead than TimSorter for small arrays
   private final InPlaceMergeSorter sorter = new InPlaceMergeSorter() {
     @Override
     protected void swap(int i, int j) {
       ArrayUtil.swap(subSpansByDoc, i, j);
     }
     @Override
     protected int compare(int i, int j) {
       return subSpansByDoc[i].doc() - subSpansByDoc[j].doc();
     }
   };

   private SpanNearQuery query;
   private boolean collectPayloads = true;

   public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
     this(spanNearQuery, context, acceptDocs, termContexts, true);
   }

   public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, boolean collectPayloads)
   throws IOException {
     if (spanNearQuery.getClauses().length < 2) {
       throw new IllegalArgumentException("Less than 2 clauses: "
                                          + spanNearQuery);
     }
     this.collectPayloads = collectPayloads;
     allowedSlop = spanNearQuery.getSlop();
     SpanQuery[] clauses = spanNearQuery.getClauses();
     subSpans = new Spans[clauses.length];
     matchPayload = new LinkedList<>();
     subSpansByDoc = new Spans[clauses.length];
     for (int i = 0; i < clauses.length; i++) {
       subSpans[i] = clauses[i].getSpans(context, acceptDocs, termContexts);
       subSpansByDoc[i] = subSpans[i]; // used in toSameDoc()
     }
     query = spanNearQuery; // kept for toString() only.
   }

   // inherit javadocs
   @Override
   public int doc() { return matchDoc; }

   // inherit javadocs
   @Override
   public int start() { return matchStart; }

   // inherit javadocs
   @Override
   public int end() { return matchEnd; }

   public Spans[] getSubSpans() {
     return subSpans;
   }

   // TODO: Remove warning after API has been finalized
   // TODO: Would be nice to be able to lazy load payloads
   @Override
   public Collection<byte[]> getPayload() throws IOException {
     return matchPayload;
   }

   // TODO: Remove warning after API has been finalized
   @Override
   public boolean isPayloadAvailable() {
     return matchPayload.isEmpty() == false;
   }

   @Override
   public long cost() {
     long minCost = Long.MAX_VALUE;
     for (int i = 0; i < subSpans.length; i++) {
       minCost = Math.min(minCost, subSpans[i].cost());
     }
     return minCost;
   }

   // inherit javadocs
   @Override
   public boolean next() throws IOException {
     if (firstTime) {
       firstTime = false;
       for (int i = 0; i < subSpans.length; i++) {
         if (! subSpans[i].next()) {
           more = false;
           return false;
         }
       }
       more = true;
     }
     if(collectPayloads) {
       matchPayload.clear();
     }
     return advanceAfterOrdered();
   }

   // inherit javadocs
   @Override
   public boolean skipTo(int target) throws IOException {
     if (firstTime) {
       firstTime = false;
       for (int i = 0; i < subSpans.length; i++) {
         if (! subSpans[i].skipTo(target)) {
           more = false;
           return false;
         }
       }
       more = true;
     } else if (more && (subSpans[0].doc() < target)) {
       if (subSpans[0].skipTo(target)) {
         inSameDoc = false;
       } else {
         more = false;
         return false;
       }
     }
     if(collectPayloads) {
       matchPayload.clear();
     }
     return advanceAfterOrdered();
   }

   /** Advances the subSpans to just after an ordered match with a minimum slop
    * that is smaller than the slop allowed by the SpanNearQuery.
    * @return true iff there is such a match.
    */
   private boolean advanceAfterOrdered() throws IOException {
     while (more && (inSameDoc || toSameDoc())) {
       if (stretchToOrder() && shrinkToAfterShortestMatch()) {
         return true;
       }
     }
     return false; // no more matches
   }


   /** Advance the subSpans to the same document */
   private boolean toSameDoc() throws IOException {
     sorter.sort(0, subSpansByDoc.length);
     int firstIndex = 0;
     int maxDoc = subSpansByDoc[subSpansByDoc.length - 1].doc();
     while (subSpansByDoc[firstIndex].doc() != maxDoc) {
       if (! subSpansByDoc[firstIndex].skipTo(maxDoc)) {
         more = false;
         inSameDoc = false;
         return false;
       }
       maxDoc = subSpansByDoc[firstIndex].doc();
       if (++firstIndex == subSpansByDoc.length) {
         firstIndex = 0;
       }
     }
     for (int i = 0; i < subSpansByDoc.length; i++) {
       assert (subSpansByDoc[i].doc() == maxDoc)
              : " NearSpansOrdered.toSameDoc() spans " + subSpansByDoc[0]
                                  + "\n at doc " + subSpansByDoc[i].doc()
                                  + ", but should be at " + maxDoc;
     }
     inSameDoc = true;
     return true;
   }

   /** Check whether two Spans in the same document are ordered.
    * @return true iff spans1 starts before spans2
    *              or the spans start at the same position,
    *              and spans1 ends before spans2.
    */
   static final boolean docSpansOrdered(Spans spans1, Spans spans2) {
     assert spans1.doc() == spans2.doc() : "doc1 " + spans1.doc() + " != doc2 " + spans2.doc();
     int start1 = spans1.start();
     int start2 = spans2.start();
     /* Do not call docSpansOrdered(int,int,int,int) to avoid invoking .end() : */
     return (start1 == start2) ? (spans1.end() < spans2.end()) : (start1 < start2);
   }

   /** Like {@link #docSpansOrdered(Spans,Spans)}, but use the spans
    * starts and ends as parameters.
    */
   private static final boolean docSpansOrdered(int start1, int end1, int start2, int end2) {
     return (start1 == start2) ? (end1 < end2) : (start1 < start2);
   }

   /** Order the subSpans within the same document by advancing all later spans
    * after the previous one.
    */
   private boolean stretchToOrder() throws IOException {
     matchDoc = subSpans[0].doc();
     for (int i = 1; inSameDoc && (i < subSpans.length); i++) {
       while (! docSpansOrdered(subSpans[i-1], subSpans[i])) {
         if (! subSpans[i].next()) {
           inSameDoc = false;
           more = false;
           break;
         } else if (matchDoc != subSpans[i].doc()) {
           inSameDoc = false;
           break;
         }
       }
     }
     return inSameDoc;
   }

   /** The subSpans are ordered in the same doc, so there is a possible match.
    * Compute the slop while making the match as short as possible by advancing
    * all subSpans except the last one in reverse order.
    */
   private boolean shrinkToAfterShortestMatch() throws IOException {
     matchStart = subSpans[subSpans.length - 1].start();
     matchEnd = subSpans[subSpans.length - 1].end();
     Set<byte[]> possibleMatchPayloads = new HashSet<>();
     if (subSpans[subSpans.length - 1].isPayloadAvailable()) {
       possibleMatchPayloads.addAll(subSpans[subSpans.length - 1].getPayload());
     }

     Collection<byte[]> possiblePayload = null;

     int matchSlop = 0;
     int lastStart = matchStart;
     int lastEnd = matchEnd;
     for (int i = subSpans.length - 2; i >= 0; i--) {
       Spans prevSpans = subSpans[i];
       if (collectPayloads && prevSpans.isPayloadAvailable()) {
         Collection<byte[]> payload = prevSpans.getPayload();
         possiblePayload = new ArrayList<>(payload.size());
         possiblePayload.addAll(payload);
       }

       int prevStart = prevSpans.start();
       int prevEnd = prevSpans.end();
       while (true) { // Advance prevSpans until after (lastStart, lastEnd)
         if (! prevSpans.next()) {
           inSameDoc = false;
           more = false;
           break; // Check remaining subSpans for final match.
         } else if (matchDoc != prevSpans.doc()) {
           inSameDoc = false; // The last subSpans is not advanced here.
           break; // Check remaining subSpans for last match in this document.
         } else {
           int ppStart = prevSpans.start();
           int ppEnd = prevSpans.end(); // Cannot avoid invoking .end()
           if (! docSpansOrdered(ppStart, ppEnd, lastStart, lastEnd)) {
             break; // Check remaining subSpans.
           } else { // prevSpans still before (lastStart, lastEnd)
             prevStart = ppStart;
             prevEnd = ppEnd;
             if (collectPayloads && prevSpans.isPayloadAvailable()) {
               Collection<byte[]> payload = prevSpans.getPayload();
               possiblePayload = new ArrayList<>(payload.size());
               possiblePayload.addAll(payload);
             }
           }
         }
       }

       if (collectPayloads && possiblePayload != null) {
         possibleMatchPayloads.addAll(possiblePayload);
       }

       assert prevStart <= matchStart;
       if (matchStart > prevEnd) { // Only non overlapping spans add to slop.
         matchSlop += (matchStart - prevEnd);
       }

       /* Do not break on (matchSlop > allowedSlop) here to make sure
        * that subSpans[0] is advanced after the match, if any.
        */
       matchStart = prevStart;
       lastStart = prevStart;
       lastEnd = prevEnd;
     }

     boolean match = matchSlop <= allowedSlop;

     if(collectPayloads && match && possibleMatchPayloads.size() > 0) {
       matchPayload.addAll(possibleMatchPayloads);
     }

     return match; // ordered and allowed slop
   }

   @Override
   public String toString() {
     return getClass().getName() + "("+query.toString()+")@"+
       (firstTime?"START":(more?(doc()+":"+start()+"-"+end()):"END"));
   }
 }
	package org.apache.lucene.search.spans;

	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import org.apache.lucene.index.AtomicReaderContext;
	import org.apache.lucene.index.Term;
	import org.apache.lucene.index.TermContext;
	import org.apache.lucene.util.ArrayUtil;
	import org.apache.lucene.util.Bits;
	import org.apache.lucene.util.InPlaceMergeSorter;

	import java.io.IOException;
	import java.util.ArrayList;
	import java.util.Comparator;
	import java.util.HashSet;
	import java.util.LinkedList;
	import java.util.List;
	import java.util.Collection;
	import java.util.Map;
	import java.util.Set;

	/** A Spans that is formed from the ordered subspans of a SpanNearQuery
	* where the subspans do not overlap and have a maximum slop between them.
	* <p>
	* The formed spans only contains minimum slop matches.<br>
	* The matching slop is computed from the distance(s) between
	* the non overlapping matching Spans.<br>
	* Successive matches are always formed from the successive Spans
	* of the SpanNearQuery.
	* <p>
	* The formed spans may contain overlaps when the slop is at least 1.
	* For example, when querying using
	* <pre>t1 t2 t3</pre>
	* with slop at least 1, the fragment:
	* <pre>t1 t2 t1 t3 t2 t3</pre>
	* matches twice:
	* <pre>t1 t2 .. t3 </pre>
	* <pre> t1 .. t2 t3</pre>
	*
	*
	* Expert:
	* Only public for subclassing. Most implementations should not need this class
	*/
	public class NearSpansOrdered extends Spans {
	private final int allowedSlop;
	private boolean firstTime = true;
	private boolean more = false;

	/** The spans in the same order as the SpanNearQuery */
	private final Spans[] subSpans;

	/** Indicates that all subSpans have same doc() */
	private boolean inSameDoc = false;

	private int matchDoc = -1;
	private int matchStart = -1;
	private int matchEnd = -1;
	private List<byte[]> matchPayload;

	private final Spans[] subSpansByDoc;
	// Even though the array is probably almost sorted, InPlaceMergeSorter will likely
	// perform better since it has a lower overhead than TimSorter for small arrays
	private final InPlaceMergeSorter sorter = new InPlaceMergeSorter() {
	@Override
	protected void swap(int i, int j) {
	ArrayUtil.swap(subSpansByDoc, i, j);
	}
	@Override
	protected int compare(int i, int j) {
	return subSpansByDoc[i].doc() - subSpansByDoc[j].doc();
	}
	};

	private SpanNearQuery query;
	private boolean collectPayloads = true;

	public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
	this(spanNearQuery, context, acceptDocs, termContexts, true);
	}

	public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, boolean collectPayloads)
	throws IOException {
	if (spanNearQuery.getClauses().length < 2) {
	throw new IllegalArgumentException("Less than 2 clauses: "
	+ spanNearQuery);
	}
	this.collectPayloads = collectPayloads;
	allowedSlop = spanNearQuery.getSlop();
	SpanQuery[] clauses = spanNearQuery.getClauses();
	subSpans = new Spans[clauses.length];
	matchPayload = new LinkedList<>();
	subSpansByDoc = new Spans[clauses.length];
	for (int i = 0; i < clauses.length; i++) {
	subSpans[i] = clauses[i].getSpans(context, acceptDocs, termContexts);
	subSpansByDoc[i] = subSpans[i]; // used in toSameDoc()
	}
	query = spanNearQuery; // kept for toString() only.
	}

	// inherit javadocs
	@Override
	public int doc() { return matchDoc; }

	// inherit javadocs
	@Override
	public int start() { return matchStart; }

	// inherit javadocs
	@Override
	public int end() { return matchEnd; }

	public Spans[] getSubSpans() {
	return subSpans;
	}

	// TODO: Remove warning after API has been finalized
	// TODO: Would be nice to be able to lazy load payloads
	@Override
	public Collection<byte[]> getPayload() throws IOException {
	return matchPayload;
	}

	// TODO: Remove warning after API has been finalized
	@Override
	public boolean isPayloadAvailable() {
	return matchPayload.isEmpty() == false;
	}

	@Override
	public long cost() {
	long minCost = Long.MAX_VALUE;
	for (int i = 0; i < subSpans.length; i++) {
	minCost = Math.min(minCost, subSpans[i].cost());
	}
	return minCost;
	}

	// inherit javadocs
	@Override
	public boolean next() throws IOException {
	if (firstTime) {
	firstTime = false;
	for (int i = 0; i < subSpans.length; i++) {
	if (! subSpans[i].next()) {
	more = false;
	return false;
	}
	}
	more = true;
	}
	if(collectPayloads) {
	matchPayload.clear();
	}
	return advanceAfterOrdered();
	}

	// inherit javadocs
	@Override
	public boolean skipTo(int target) throws IOException {
	if (firstTime) {
	firstTime = false;
	for (int i = 0; i < subSpans.length; i++) {
	if (! subSpans[i].skipTo(target)) {
	more = false;
	return false;
	}
	}
	more = true;
	} else if (more && (subSpans[0].doc() < target)) {
	if (subSpans[0].skipTo(target)) {
	inSameDoc = false;
	} else {
	more = false;
	return false;
	}
	}
	if(collectPayloads) {
	matchPayload.clear();
	}
	return advanceAfterOrdered();
	}

	/** Advances the subSpans to just after an ordered match with a minimum slop
	* that is smaller than the slop allowed by the SpanNearQuery.
	* @return true iff there is such a match.
	*/
	private boolean advanceAfterOrdered() throws IOException {
	while (more && (inSameDoc \|\| toSameDoc())) {
	if (stretchToOrder() && shrinkToAfterShortestMatch()) {
	return true;
	}
	}
	return false; // no more matches
	}


	/** Advance the subSpans to the same document */
	private boolean toSameDoc() throws IOException {
	sorter.sort(0, subSpansByDoc.length);
	int firstIndex = 0;
	int maxDoc = subSpansByDoc[subSpansByDoc.length - 1].doc();
	while (subSpansByDoc[firstIndex].doc() != maxDoc) {
	if (! subSpansByDoc[firstIndex].skipTo(maxDoc)) {
	more = false;
	inSameDoc = false;
	return false;
	}
	maxDoc = subSpansByDoc[firstIndex].doc();
	if (++firstIndex == subSpansByDoc.length) {
	firstIndex = 0;
	}
	}
	for (int i = 0; i < subSpansByDoc.length; i++) {
	assert (subSpansByDoc[i].doc() == maxDoc)
	: " NearSpansOrdered.toSameDoc() spans " + subSpansByDoc[0]
	+ "\n at doc " + subSpansByDoc[i].doc()
	+ ", but should be at " + maxDoc;
	}
	inSameDoc = true;
	return true;
	}

	/** Check whether two Spans in the same document are ordered.
	* @return true iff spans1 starts before spans2
	* or the spans start at the same position,
	* and spans1 ends before spans2.
	*/
	static final boolean docSpansOrdered(Spans spans1, Spans spans2) {
	assert spans1.doc() == spans2.doc() : "doc1 " + spans1.doc() + " != doc2 " + spans2.doc();
	int start1 = spans1.start();
	int start2 = spans2.start();
	/* Do not call docSpansOrdered(int,int,int,int) to avoid invoking .end() : */
	return (start1 == start2) ? (spans1.end() < spans2.end()) : (start1 < start2);
	}

	/** Like {@link #docSpansOrdered(Spans,Spans)}, but use the spans
	* starts and ends as parameters.
	*/
	private static final boolean docSpansOrdered(int start1, int end1, int start2, int end2) {
	return (start1 == start2) ? (end1 < end2) : (start1 < start2);
	}

	/** Order the subSpans within the same document by advancing all later spans
	* after the previous one.
	*/
	private boolean stretchToOrder() throws IOException {
	matchDoc = subSpans[0].doc();
	for (int i = 1; inSameDoc && (i < subSpans.length); i++) {
	while (! docSpansOrdered(subSpans[i-1], subSpans[i])) {
	if (! subSpans[i].next()) {
	inSameDoc = false;
	more = false;
	break;
	} else if (matchDoc != subSpans[i].doc()) {
	inSameDoc = false;
	break;
	}
	}
	}
	return inSameDoc;
	}

	/** The subSpans are ordered in the same doc, so there is a possible match.
	* Compute the slop while making the match as short as possible by advancing
	* all subSpans except the last one in reverse order.
	*/
	private boolean shrinkToAfterShortestMatch() throws IOException {
	matchStart = subSpans[subSpans.length - 1].start();
	matchEnd = subSpans[subSpans.length - 1].end();
	Set<byte[]> possibleMatchPayloads = new HashSet<>();
	if (subSpans[subSpans.length - 1].isPayloadAvailable()) {
	possibleMatchPayloads.addAll(subSpans[subSpans.length - 1].getPayload());
	}

	Collection<byte[]> possiblePayload = null;

	int matchSlop = 0;
	int lastStart = matchStart;
	int lastEnd = matchEnd;
	for (int i = subSpans.length - 2; i >= 0; i--) {
	Spans prevSpans = subSpans[i];
	if (collectPayloads && prevSpans.isPayloadAvailable()) {
	Collection<byte[]> payload = prevSpans.getPayload();
	possiblePayload = new ArrayList<>(payload.size());
	possiblePayload.addAll(payload);
	}

	int prevStart = prevSpans.start();
	int prevEnd = prevSpans.end();
	while (true) { // Advance prevSpans until after (lastStart, lastEnd)
	if (! prevSpans.next()) {
	inSameDoc = false;
	more = false;
	break; // Check remaining subSpans for final match.
	} else if (matchDoc != prevSpans.doc()) {
	inSameDoc = false; // The last subSpans is not advanced here.
	break; // Check remaining subSpans for last match in this document.
	} else {
	int ppStart = prevSpans.start();
	int ppEnd = prevSpans.end(); // Cannot avoid invoking .end()
	if (! docSpansOrdered(ppStart, ppEnd, lastStart, lastEnd)) {
	break; // Check remaining subSpans.
	} else { // prevSpans still before (lastStart, lastEnd)
	prevStart = ppStart;
	prevEnd = ppEnd;
	if (collectPayloads && prevSpans.isPayloadAvailable()) {
	Collection<byte[]> payload = prevSpans.getPayload();
	possiblePayload = new ArrayList<>(payload.size());
	possiblePayload.addAll(payload);
	}
	}
	}
	}

	if (collectPayloads && possiblePayload != null) {
	possibleMatchPayloads.addAll(possiblePayload);
	}

	assert prevStart <= matchStart;
	if (matchStart > prevEnd) { // Only non overlapping spans add to slop.
	matchSlop += (matchStart - prevEnd);
	}

	/* Do not break on (matchSlop > allowedSlop) here to make sure
	* that subSpans[0] is advanced after the match, if any.
	*/
	matchStart = prevStart;
	lastStart = prevStart;
	lastEnd = prevEnd;
	}

	boolean match = matchSlop <= allowedSlop;

	if(collectPayloads && match && possibleMatchPayloads.size() > 0) {
	matchPayload.addAll(possibleMatchPayloads);
	}

	return match; // ordered and allowed slop
	}

	@Override
	public String toString() {
	return getClass().getName() + "("+query.toString()+")@"+
	(firstTime?"START":(more?(doc()+":"+start()+"-"+end()):"END"));
	}
	}