1 package org.apache.lucene.search.payloads;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.index.IndexReader;
21 import org.apache.lucene.search.Explanation;
22 import org.apache.lucene.search.Scorer;
23 import org.apache.lucene.search.Searcher;
24 import org.apache.lucene.search.Similarity;
25 import org.apache.lucene.search.Weight;
26 import org.apache.lucene.search.spans.NearSpansOrdered;
27 import org.apache.lucene.search.spans.NearSpansUnordered;
28 import org.apache.lucene.search.spans.SpanNearQuery;
29 import org.apache.lucene.search.spans.SpanQuery;
30 import org.apache.lucene.search.spans.SpanScorer;
31 import org.apache.lucene.search.spans.SpanWeight;
32 import org.apache.lucene.search.spans.Spans;
33 import org.apache.lucene.util.ToStringUtils;
35 import java.io.IOException;
36 import java.util.Collection;
37 import java.util.Iterator;
40 * This class is very similar to
41 * {@link org.apache.lucene.search.spans.SpanNearQuery} except that it factors
42 * in the value of the payloads located at each of the positions where the
43 * {@link org.apache.lucene.search.spans.TermSpans} occurs.
45 * In order to take advantage of this, you must override
46 * {@link org.apache.lucene.search.Similarity#scorePayload}
47 * which returns 1 by default.
49 * Payload scores are aggregated using a pluggable {@link PayloadFunction}.
51 * @see org.apache.lucene.search.Similarity#scorePayload
53 public class PayloadNearQuery extends SpanNearQuery {
54 protected String fieldName;
55 protected PayloadFunction function;
57 public PayloadNearQuery(SpanQuery[] clauses, int slop, boolean inOrder) {
58 this(clauses, slop, inOrder, new AveragePayloadFunction());
61 public PayloadNearQuery(SpanQuery[] clauses, int slop, boolean inOrder,
62 PayloadFunction function) {
63 super(clauses, slop, inOrder);
64 fieldName = clauses[0].getField(); // all clauses must have same field
65 this.function = function;
69 public Weight createWeight(Searcher searcher) throws IOException {
70 return new PayloadNearSpanWeight(this, searcher);
74 public Object clone() {
75 int sz = clauses.size();
76 SpanQuery[] newClauses = new SpanQuery[sz];
78 for (int i = 0; i < sz; i++) {
79 newClauses[i] = (SpanQuery) clauses.get(i).clone();
81 PayloadNearQuery boostingNearQuery = new PayloadNearQuery(newClauses, slop,
83 boostingNearQuery.setBoost(getBoost());
84 return boostingNearQuery;
88 public String toString(String field) {
89 StringBuilder buffer = new StringBuilder();
90 buffer.append("payloadNear([");
91 Iterator<SpanQuery> i = clauses.iterator();
93 SpanQuery clause = i.next();
94 buffer.append(clause.toString(field));
102 buffer.append(inOrder);
104 buffer.append(ToStringUtils.boost(getBoost()));
105 return buffer.toString();
109 public int hashCode() {
110 final int prime = 31;
111 int result = super.hashCode();
112 result = prime * result + ((fieldName == null) ? 0 : fieldName.hashCode());
113 result = prime * result + ((function == null) ? 0 : function.hashCode());
118 public boolean equals(Object obj) {
121 if (!super.equals(obj))
123 if (getClass() != obj.getClass())
125 PayloadNearQuery other = (PayloadNearQuery) obj;
126 if (fieldName == null) {
127 if (other.fieldName != null)
129 } else if (!fieldName.equals(other.fieldName))
131 if (function == null) {
132 if (other.function != null)
134 } else if (!function.equals(other.function))
139 public class PayloadNearSpanWeight extends SpanWeight {
140 public PayloadNearSpanWeight(SpanQuery query, Searcher searcher)
142 super(query, searcher);
146 public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder,
147 boolean topScorer) throws IOException {
148 return new PayloadNearSpanScorer(query.getSpans(reader), this,
149 similarity, reader.norms(query.getField()));
153 public class PayloadNearSpanScorer extends SpanScorer {
155 protected float payloadScore;
156 private int payloadsSeen;
157 Similarity similarity = getSimilarity();
159 protected PayloadNearSpanScorer(Spans spans, Weight weight,
160 Similarity similarity, byte[] norms) throws IOException {
161 super(spans, weight, similarity, norms);
165 // Get the payloads associated with all underlying subspans
166 public void getPayloads(Spans[] subSpans) throws IOException {
167 for (int i = 0; i < subSpans.length; i++) {
168 if (subSpans[i] instanceof NearSpansOrdered) {
169 if (((NearSpansOrdered) subSpans[i]).isPayloadAvailable()) {
170 processPayloads(((NearSpansOrdered) subSpans[i]).getPayload(),
171 subSpans[i].start(), subSpans[i].end());
173 getPayloads(((NearSpansOrdered) subSpans[i]).getSubSpans());
174 } else if (subSpans[i] instanceof NearSpansUnordered) {
175 if (((NearSpansUnordered) subSpans[i]).isPayloadAvailable()) {
176 processPayloads(((NearSpansUnordered) subSpans[i]).getPayload(),
177 subSpans[i].start(), subSpans[i].end());
179 getPayloads(((NearSpansUnordered) subSpans[i]).getSubSpans());
185 * By default, uses the {@link PayloadFunction} to score the payloads, but
186 * can be overridden to do other things.
188 * @param payLoads The payloads
189 * @param start The start position of the span being scored
190 * @param end The end position of the span being scored
194 protected void processPayloads(Collection<byte[]> payLoads, int start, int end) {
195 for (final byte[] thePayload : payLoads) {
196 payloadScore = function.currentScore(doc, fieldName, start, end,
197 payloadsSeen, payloadScore, similarity.scorePayload(doc, fieldName,
198 spans.start(), spans.end(), thePayload, 0, thePayload.length));
205 protected boolean setFreqCurrentDoc() throws IOException {
214 int matchLength = spans.end() - spans.start();
215 freq += getSimilarity().sloppyFreq(matchLength);
216 Spans[] spansArr = new Spans[1];
218 getPayloads(spansArr);
220 } while (more && (doc == spans.doc()));
225 public float score() throws IOException {
228 * function.docScore(doc, fieldName, payloadsSeen, payloadScore);
232 protected Explanation explain(int doc) throws IOException {
233 Explanation result = new Explanation();
234 // Add detail about tf/idf...
235 Explanation nonPayloadExpl = super.explain(doc);
236 result.addDetail(nonPayloadExpl);
237 // Add detail about payload
238 Explanation payloadExpl = function.explain(doc, payloadsSeen, payloadScore);
239 result.addDetail(payloadExpl);
240 result.setValue(nonPayloadExpl.getValue() * payloadExpl.getValue());
241 result.setDescription("PayloadNearQuery, product of:");