X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java diff --git a/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java new file mode 100644 index 0000000..3574bb5 --- /dev/null +++ b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java @@ -0,0 +1,239 @@ +package org.apache.lucene.search.payloads; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.Term; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.TermPositions; +import org.apache.lucene.search.Searcher; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; +import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.ComplexExplanation; +import org.apache.lucene.search.spans.TermSpans; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.search.spans.SpanWeight; +import org.apache.lucene.search.spans.SpanScorer; + +import java.io.IOException; + +/** + * This class is very similar to + * {@link org.apache.lucene.search.spans.SpanTermQuery} except that it factors + * in the value of the payload located at each of the positions where the + * {@link org.apache.lucene.index.Term} occurs. + *

+ * In order to take advantage of this, you must override + * {@link org.apache.lucene.search.Similarity#scorePayload(int, String, int, int, byte[],int,int)} + * which returns 1 by default. + *

+ * Payload scores are aggregated using a pluggable {@link PayloadFunction}. + **/ +public class PayloadTermQuery extends SpanTermQuery { + protected PayloadFunction function; + private boolean includeSpanScore; + + public PayloadTermQuery(Term term, PayloadFunction function) { + this(term, function, true); + } + + public PayloadTermQuery(Term term, PayloadFunction function, + boolean includeSpanScore) { + super(term); + this.function = function; + this.includeSpanScore = includeSpanScore; + } + + @Override + public Weight createWeight(Searcher searcher) throws IOException { + return new PayloadTermWeight(this, searcher); + } + + protected class PayloadTermWeight extends SpanWeight { + + public PayloadTermWeight(PayloadTermQuery query, Searcher searcher) + throws IOException { + super(query, searcher); + } + + @Override + public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, + boolean topScorer) throws IOException { + return new PayloadTermSpanScorer((TermSpans) query.getSpans(reader), + this, similarity, reader.norms(query.getField())); + } + + @Override + public Explanation explain(IndexReader reader, int doc) throws IOException { + if (includeSpanScore) { + return super.explain(reader, doc); + } else { + // if we don't include the span score, we need to return our scorer's explanation only + PayloadTermSpanScorer scorer = (PayloadTermSpanScorer) scorer(reader, true, false); + return scorer.explain(doc); + } + } + + protected class PayloadTermSpanScorer extends SpanScorer { + // TODO: is this the best way to allocate this? + protected byte[] payload = new byte[256]; + protected TermPositions positions; + protected float payloadScore; + protected int payloadsSeen; + + public PayloadTermSpanScorer(TermSpans spans, Weight weight, + Similarity similarity, byte[] norms) throws IOException { + super(spans, weight, similarity, norms); + positions = spans.getPositions(); + } + + @Override + protected boolean setFreqCurrentDoc() throws IOException { + if (!more) { + return false; + } + doc = spans.doc(); + freq = 0.0f; + payloadScore = 0; + payloadsSeen = 0; + Similarity similarity1 = getSimilarity(); + while (more && doc == spans.doc()) { + int matchLength = spans.end() - spans.start(); + + freq += similarity1.sloppyFreq(matchLength); + processPayload(similarity1); + + more = spans.next();// this moves positions to the next match in this + // document + } + return more || (freq != 0); + } + + protected void processPayload(Similarity similarity) throws IOException { + if (positions.isPayloadAvailable()) { + payload = positions.getPayload(payload, 0); + payloadScore = function.currentScore(doc, term.field(), + spans.start(), spans.end(), payloadsSeen, payloadScore, + similarity.scorePayload(doc, term.field(), spans.start(), spans + .end(), payload, 0, positions.getPayloadLength())); + payloadsSeen++; + + } else { + // zero out the payload? + } + } + + /** + * + * @return {@link #getSpanScore()} * {@link #getPayloadScore()} + * @throws IOException + */ + @Override + public float score() throws IOException { + + return includeSpanScore ? getSpanScore() * getPayloadScore() + : getPayloadScore(); + } + + /** + * Returns the SpanScorer score only. + *

+ * Should not be overridden without good cause! + * + * @return the score for just the Span part w/o the payload + * @throws IOException + * + * @see #score() + */ + protected float getSpanScore() throws IOException { + return super.score(); + } + + /** + * The score for the payload + * + * @return The score, as calculated by + * {@link PayloadFunction#docScore(int, String, int, float)} + */ + protected float getPayloadScore() { + return function.docScore(doc, term.field(), payloadsSeen, payloadScore); + } + + @Override + protected Explanation explain(final int doc) throws IOException { + Explanation nonPayloadExpl = super.explain(doc); + + // QUESTION: Is there a way to avoid this skipTo call? We need to know + // whether to load the payload or not + Explanation payloadBoost = new Explanation(); + + float payloadScore = getPayloadScore(); + payloadBoost.setValue(payloadScore); + // GSI: I suppose we could toString the payload, but I don't think that + // would be a good idea + payloadBoost.setDescription("scorePayload(...)"); + + ComplexExplanation result = new ComplexExplanation(); + if (includeSpanScore) { + result.addDetail(nonPayloadExpl); + result.addDetail(payloadBoost); + result.setValue(nonPayloadExpl.getValue() * payloadScore); + result.setDescription("btq, product of:"); + } else { + result.addDetail(payloadBoost); + result.setValue(payloadScore); + result.setDescription("btq(includeSpanScore=false), result of:"); + } + result.setMatch(nonPayloadExpl.getValue() == 0 ? Boolean.FALSE + : Boolean.TRUE); // LUCENE-1303 + return result; + } + + } + } + + @Override + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + ((function == null) ? 0 : function.hashCode()); + result = prime * result + (includeSpanScore ? 1231 : 1237); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (!super.equals(obj)) + return false; + if (getClass() != obj.getClass()) + return false; + PayloadTermQuery other = (PayloadTermQuery) obj; + if (function == null) { + if (other.function != null) + return false; + } else if (!function.equals(other.function)) + return false; + if (includeSpanScore != other.includeSpanScore) + return false; + return true; + } + +}