1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
23 import org.apache.lucene.index.IndexReader;
24 import org.apache.lucene.index.MultipleTermPositions;
25 import org.apache.lucene.index.Term;
26 import org.apache.lucene.index.TermPositions;
27 import org.apache.lucene.search.Explanation.IDFExplanation;
28 import org.apache.lucene.util.ArrayUtil;
29 import org.apache.lucene.util.ToStringUtils;
32 * MultiPhraseQuery is a generalized version of PhraseQuery, with an added
33 * method {@link #add(Term[])}.
34 * To use this class, to search for the phrase "Microsoft app*" first use
35 * add(Term) on the term "Microsoft", then find all terms that have "app" as
36 * prefix using IndexReader.terms(Term), and use MultiPhraseQuery.add(Term[]
37 * terms) to add them to the query.
41 public class MultiPhraseQuery extends Query {
43 private ArrayList<Term[]> termArrays = new ArrayList<Term[]>();
44 private ArrayList<Integer> positions = new ArrayList<Integer>();
48 /** Sets the phrase slop for this query.
49 * @see PhraseQuery#setSlop(int)
51 public void setSlop(int s) { slop = s; }
53 /** Sets the phrase slop for this query.
54 * @see PhraseQuery#getSlop()
56 public int getSlop() { return slop; }
58 /** Add a single term at the next position in the phrase.
59 * @see PhraseQuery#add(Term)
61 public void add(Term term) { add(new Term[]{term}); }
63 /** Add multiple terms at the next position in the phrase. Any of the terms
66 * @see PhraseQuery#add(Term)
68 public void add(Term[] terms) {
70 if (positions.size() > 0)
71 position = positions.get(positions.size()-1).intValue() + 1;
77 * Allows to specify the relative position of terms within the phrase.
79 * @see PhraseQuery#add(Term, int)
83 public void add(Term[] terms, int position) {
84 if (termArrays.size() == 0)
85 field = terms[0].field();
87 for (int i = 0; i < terms.length; i++) {
88 if (terms[i].field() != field) {
89 throw new IllegalArgumentException(
90 "All phrase terms must be in the same field (" + field + "): "
95 termArrays.add(terms);
96 positions.add(Integer.valueOf(position));
100 * Returns a List of the terms in the multiphrase.
101 * Do not modify the List or its contents.
103 public List<Term[]> getTermArrays() {
104 return Collections.unmodifiableList(termArrays);
108 * Returns the relative positions of terms in this phrase.
110 public int[] getPositions() {
111 int[] result = new int[positions.size()];
112 for (int i = 0; i < positions.size(); i++)
113 result[i] = positions.get(i).intValue();
119 public void extractTerms(Set<Term> terms) {
120 for (final Term[] arr : termArrays) {
121 for (final Term term: arr) {
128 private class MultiPhraseWeight extends Weight {
129 private Similarity similarity;
131 private final IDFExplanation idfExp;
133 private float queryNorm;
134 private float queryWeight;
136 public MultiPhraseWeight(Searcher searcher)
138 this.similarity = getSimilarity(searcher);
141 ArrayList<Term> allTerms = new ArrayList<Term>();
142 for(final Term[] terms: termArrays) {
143 for (Term term: terms) {
147 idfExp = similarity.idfExplain(allTerms, searcher);
148 idf = idfExp.getIdf();
152 public Query getQuery() { return MultiPhraseQuery.this; }
155 public float getValue() { return value; }
158 public float sumOfSquaredWeights() {
159 queryWeight = idf * getBoost(); // compute query weight
160 return queryWeight * queryWeight; // square it
164 public void normalize(float queryNorm) {
165 this.queryNorm = queryNorm;
166 queryWeight *= queryNorm; // normalize query weight
167 value = queryWeight * idf; // idf for document
171 public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException {
172 if (termArrays.size() == 0) // optimize zero-term case
175 PhraseQuery.PostingsAndFreq[] postingsFreqs = new PhraseQuery.PostingsAndFreq[termArrays.size()];
177 for (int pos=0; pos<postingsFreqs.length; pos++) {
178 Term[] terms = termArrays.get(pos);
180 final TermPositions p;
183 if (terms.length > 1) {
184 p = new MultipleTermPositions(reader, terms);
186 // coarse -- this overcounts since a given doc can
187 // have more than one terms:
189 for(int termIdx=0;termIdx<terms.length;termIdx++) {
190 docFreq += reader.docFreq(terms[termIdx]);
193 p = reader.termPositions(terms[0]);
194 docFreq = reader.docFreq(terms[0]);
200 postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(p, docFreq, positions.get(pos).intValue(), terms[0]);
203 // sort by increasing docFreq order
205 ArrayUtil.mergeSort(postingsFreqs);
209 ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity,
210 reader.norms(field));
217 return new SloppyPhraseScorer(this, postingsFreqs, similarity,
218 slop, reader.norms(field));
223 public Explanation explain(IndexReader reader, int doc)
225 ComplexExplanation result = new ComplexExplanation();
226 result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");
228 Explanation idfExpl = new Explanation(idf, "idf(" + field + ":" + idfExp.explain() +")");
230 // explain query weight
231 Explanation queryExpl = new Explanation();
232 queryExpl.setDescription("queryWeight(" + getQuery() + "), product of:");
234 Explanation boostExpl = new Explanation(getBoost(), "boost");
235 if (getBoost() != 1.0f)
236 queryExpl.addDetail(boostExpl);
238 queryExpl.addDetail(idfExpl);
240 Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm");
241 queryExpl.addDetail(queryNormExpl);
243 queryExpl.setValue(boostExpl.getValue() *
245 queryNormExpl.getValue());
247 result.addDetail(queryExpl);
249 // explain field weight
250 ComplexExplanation fieldExpl = new ComplexExplanation();
251 fieldExpl.setDescription("fieldWeight("+getQuery()+" in "+doc+
254 Scorer scorer = scorer(reader, true, false);
255 if (scorer == null) {
256 return new Explanation(0.0f, "no matching docs");
259 Explanation tfExplanation = new Explanation();
260 int d = scorer.advance(doc);
263 phraseFreq = scorer.freq();
268 tfExplanation.setValue(similarity.tf(phraseFreq));
269 tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")");
270 fieldExpl.addDetail(tfExplanation);
271 fieldExpl.addDetail(idfExpl);
273 Explanation fieldNormExpl = new Explanation();
274 byte[] fieldNorms = reader.norms(field);
276 fieldNorms!=null ? similarity.decodeNormValue(fieldNorms[doc]) : 1.0f;
277 fieldNormExpl.setValue(fieldNorm);
278 fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
279 fieldExpl.addDetail(fieldNormExpl);
281 fieldExpl.setMatch(Boolean.valueOf(tfExplanation.isMatch()));
282 fieldExpl.setValue(tfExplanation.getValue() *
284 fieldNormExpl.getValue());
286 result.addDetail(fieldExpl);
287 result.setMatch(fieldExpl.getMatch());
290 result.setValue(queryExpl.getValue() * fieldExpl.getValue());
292 if (queryExpl.getValue() == 1.0f)
300 public Query rewrite(IndexReader reader) {
301 if (termArrays.size() == 1) { // optimize one-term case
302 Term[] terms = termArrays.get(0);
303 BooleanQuery boq = new BooleanQuery(true);
304 for (int i=0; i<terms.length; i++) {
305 boq.add(new TermQuery(terms[i]), BooleanClause.Occur.SHOULD);
307 boq.setBoost(getBoost());
315 public Weight createWeight(Searcher searcher) throws IOException {
316 return new MultiPhraseWeight(searcher);
319 /** Prints a user-readable version of this query. */
321 public final String toString(String f) {
322 StringBuilder buffer = new StringBuilder();
323 if (field == null || !field.equals(f)) {
324 buffer.append(field);
329 Iterator<Term[]> i = termArrays.iterator();
330 while (i.hasNext()) {
331 Term[] terms = i.next();
332 if (terms.length > 1) {
334 for (int j = 0; j < terms.length; j++) {
335 buffer.append(terms[j].text());
336 if (j < terms.length-1)
341 buffer.append(terms[0].text());
353 buffer.append(ToStringUtils.boost(getBoost()));
355 return buffer.toString();
359 /** Returns true if <code>o</code> is equal to this. */
361 public boolean equals(Object o) {
362 if (!(o instanceof MultiPhraseQuery)) return false;
363 MultiPhraseQuery other = (MultiPhraseQuery)o;
364 return this.getBoost() == other.getBoost()
365 && this.slop == other.slop
366 && termArraysEquals(this.termArrays, other.termArrays)
367 && this.positions.equals(other.positions);
370 /** Returns a hash code value for this object.*/
372 public int hashCode() {
373 return Float.floatToIntBits(getBoost())
375 ^ termArraysHashCode()
376 ^ positions.hashCode()
380 // Breakout calculation of the termArrays hashcode
381 private int termArraysHashCode() {
383 for (final Term[] termArray: termArrays) {
384 hashCode = 31 * hashCode
385 + (termArray == null ? 0 : Arrays.hashCode(termArray));
390 // Breakout calculation of the termArrays equals
391 private boolean termArraysEquals(List<Term[]> termArrays1, List<Term[]> termArrays2) {
392 if (termArrays1.size() != termArrays2.size()) {
395 ListIterator<Term[]> iterator1 = termArrays1.listIterator();
396 ListIterator<Term[]> iterator2 = termArrays2.listIterator();
397 while (iterator1.hasNext()) {
398 Term[] termArray1 = iterator1.next();
399 Term[] termArray2 = iterator2.next();
400 if (!(termArray1 == null ? termArray2 == null : Arrays.equals(termArray1,