blob: 04ecd800ae53ab01310d6f90400b093abc142173 [file] [log] [blame]
package org.apache.lucene.search.payloads;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.TermSpans;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.search.spans.SpanScorer;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
/**
* This class is very similar to
* {@link org.apache.lucene.search.spans.SpanTermQuery} except that it factors
* in the value of the payload located at each of the positions where the
* {@link org.apache.lucene.index.Term} occurs.
* <p/>
* NOTE: In order to take advantage of this with the default scoring implementation
* ({@link DefaultSimilarity}), you must override {@link DefaultSimilarity#scorePayload(int, int, int, BytesRef)},
* which returns 1 by default.
* <p/>
* Payload scores are aggregated using a pluggable {@link PayloadFunction}.
* @see org.apache.lucene.search.similarities.Similarity.SimScorer#computePayloadFactor(int, int, int, BytesRef)
**/
public class PayloadTermQuery extends SpanTermQuery {
protected PayloadFunction function;
private boolean includeSpanScore;
public PayloadTermQuery(Term term, PayloadFunction function) {
this(term, function, true);
}
public PayloadTermQuery(Term term, PayloadFunction function,
boolean includeSpanScore) {
super(term);
this.function = function;
this.includeSpanScore = includeSpanScore;
}
@Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
return new PayloadTermWeight(this, searcher);
}
protected class PayloadTermWeight extends SpanWeight {
public PayloadTermWeight(PayloadTermQuery query, IndexSearcher searcher)
throws IOException {
super(query, searcher);
}
@Override
public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts),
this, similarity.simScorer(stats, context));
}
protected class PayloadTermSpanScorer extends SpanScorer {
protected BytesRef payload;
protected float payloadScore;
protected int payloadsSeen;
private final TermSpans termSpans;
public PayloadTermSpanScorer(TermSpans spans, Weight weight, Similarity.SimScorer docScorer) throws IOException {
super(spans, weight, docScorer);
termSpans = spans;
}
@Override
protected boolean setFreqCurrentDoc() throws IOException {
if (!more) {
return false;
}
doc = spans.doc();
freq = 0.0f;
numMatches = 0;
payloadScore = 0;
payloadsSeen = 0;
while (more && doc == spans.doc()) {
int matchLength = spans.end() - spans.start();
freq += docScorer.computeSlopFactor(matchLength);
numMatches++;
processPayload(similarity);
more = spans.next();// this moves positions to the next match in this
// document
}
return more || (freq != 0);
}
protected void processPayload(Similarity similarity) throws IOException {
if (termSpans.isPayloadAvailable()) {
final DocsAndPositionsEnum postings = termSpans.getPostings();
payload = postings.getPayload();
if (payload != null) {
payloadScore = function.currentScore(doc, term.field(),
spans.start(), spans.end(), payloadsSeen, payloadScore,
docScorer.computePayloadFactor(doc, spans.start(), spans.end(), payload));
} else {
payloadScore = function.currentScore(doc, term.field(),
spans.start(), spans.end(), payloadsSeen, payloadScore, 1F);
}
payloadsSeen++;
} else {
// zero out the payload?
}
}
/**
*
* @return {@link #getSpanScore()} * {@link #getPayloadScore()}
* @throws IOException if there is a low-level I/O error
*/
@Override
public float score() throws IOException {
return includeSpanScore ? getSpanScore() * getPayloadScore()
: getPayloadScore();
}
/**
* Returns the SpanScorer score only.
* <p/>
* Should not be overridden without good cause!
*
* @return the score for just the Span part w/o the payload
* @throws IOException if there is a low-level I/O error
*
* @see #score()
*/
protected float getSpanScore() throws IOException {
return super.score();
}
/**
* The score for the payload
*
* @return The score, as calculated by
* {@link PayloadFunction#docScore(int, String, int, float)}
*/
protected float getPayloadScore() {
return function.docScore(doc, term.field(), payloadsSeen, payloadScore);
}
}
@Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
PayloadTermSpanScorer scorer = (PayloadTermSpanScorer) scorer(context, context.reader().getLiveDocs());
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.sloppyFreq();
SimScorer docScorer = similarity.simScorer(stats, context);
Explanation expl = new Explanation();
expl.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
expl.addDetail(scoreExplanation);
expl.setValue(scoreExplanation.getValue());
// now the payloads part
// QUESTION: Is there a way to avoid this skipTo call? We need to know
// whether to load the payload or not
// GSI: I suppose we could toString the payload, but I don't think that
// would be a good idea
String field = ((SpanQuery)getQuery()).getField();
Explanation payloadExpl = function.explain(doc, field, scorer.payloadsSeen, scorer.payloadScore);
payloadExpl.setValue(scorer.getPayloadScore());
// combined
ComplexExplanation result = new ComplexExplanation();
if (includeSpanScore) {
result.addDetail(expl);
result.addDetail(payloadExpl);
result.setValue(expl.getValue() * payloadExpl.getValue());
result.setDescription("btq, product of:");
} else {
result.addDetail(payloadExpl);
result.setValue(payloadExpl.getValue());
result.setDescription("btq(includeSpanScore=false), result of:");
}
result.setMatch(true); // LUCENE-1303
return result;
}
}
return new ComplexExplanation(false, 0.0f, "no matching term");
}
}
@Override
public int hashCode() {
final int prime = 31;
int result = super.hashCode();
result = prime * result + ((function == null) ? 0 : function.hashCode());
result = prime * result + (includeSpanScore ? 1231 : 1237);
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (!super.equals(obj))
return false;
if (getClass() != obj.getClass())
return false;
PayloadTermQuery other = (PayloadTermQuery) obj;
if (function == null) {
if (other.function != null)
return false;
} else if (!function.equals(other.function))
return false;
if (includeSpanScore != other.includeSpanScore)
return false;
return true;
}
}