1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
22 import java.util.ArrayList;
24 import org.apache.lucene.index.Term;
25 import org.apache.lucene.index.TermPositions;
26 import org.apache.lucene.index.IndexReader;
27 import org.apache.lucene.search.Explanation.IDFExplanation;
28 import org.apache.lucene.util.ToStringUtils;
29 import org.apache.lucene.util.ArrayUtil;
31 /** A Query that matches documents containing a particular sequence of terms.
32 * A PhraseQuery is built by QueryParser for input like <code>"new york"</code>.
34 * <p>This query may be combined with other terms or queries with a {@link BooleanQuery}.
36 public class PhraseQuery extends Query {
38 private ArrayList<Term> terms = new ArrayList<Term>(4);
39 private ArrayList<Integer> positions = new ArrayList<Integer>(4);
40 private int maxPosition = 0;
43 /** Constructs an empty phrase query. */
44 public PhraseQuery() {}
46 /** Sets the number of other words permitted between words in query phrase.
47 If zero, then this is an exact phrase search. For larger values this works
48 like a <code>WITHIN</code> or <code>NEAR</code> operator.
50 <p>The slop is in fact an edit-distance, where the units correspond to
51 moves of terms in the query phrase out of position. For example, to switch
52 the order of two words requires two moves (the first move places the words
53 atop one another), so to permit re-orderings of phrases, the slop must be
56 <p>More exact matches are scored higher than sloppier matches, thus search
57 results are sorted by exactness.
59 <p>The slop is zero by default, requiring exact matches.*/
60 public void setSlop(int s) { slop = s; }
61 /** Returns the slop. See setSlop(). */
62 public int getSlop() { return slop; }
65 * Adds a term to the end of the query phrase.
66 * The relative position of the term is the one immediately after the last term added.
68 public void add(Term term) {
70 if(positions.size() > 0)
71 position = positions.get(positions.size()-1).intValue() + 1;
77 * Adds a term to the end of the query phrase.
78 * The relative position of the term within the phrase is specified explicitly.
79 * This allows e.g. phrases with more than one term at the same position
80 * or phrases with gaps (e.g. in connection with stopwords).
85 public void add(Term term, int position) {
86 if (terms.size() == 0)
88 else if (term.field() != field)
89 throw new IllegalArgumentException("All phrase terms must be in the same field: " + term);
92 positions.add(Integer.valueOf(position));
93 if (position > maxPosition) maxPosition = position;
96 /** Returns the set of terms in this phrase. */
97 public Term[] getTerms() {
98 return terms.toArray(new Term[0]);
102 * Returns the relative positions of terms in this phrase.
104 public int[] getPositions() {
105 int[] result = new int[positions.size()];
106 for(int i = 0; i < positions.size(); i++)
107 result[i] = positions.get(i).intValue();
112 public Query rewrite(IndexReader reader) throws IOException {
113 if (terms.size() == 1) {
114 TermQuery tq = new TermQuery(terms.get(0));
115 tq.setBoost(getBoost());
118 return super.rewrite(reader);
121 static class PostingsAndFreq implements Comparable<PostingsAndFreq> {
122 final TermPositions postings;
127 public PostingsAndFreq(TermPositions postings, int docFreq, int position, Term term) {
128 this.postings = postings;
129 this.docFreq = docFreq;
130 this.position = position;
134 public int compareTo(PostingsAndFreq other) {
135 if (docFreq == other.docFreq) {
136 if (position == other.position) {
137 return term.compareTo(other.term);
139 return position - other.position;
141 return docFreq - other.docFreq;
145 public int hashCode() {
146 final int prime = 31;
148 result = prime * result + docFreq;
149 result = prime * result + position;
150 result = prime * result + ((term == null) ? 0 : term.hashCode());
155 public boolean equals(Object obj) {
156 if (this == obj) return true;
157 if (obj == null) return false;
158 if (getClass() != obj.getClass()) return false;
159 PostingsAndFreq other = (PostingsAndFreq) obj;
160 if (docFreq != other.docFreq) return false;
161 if (position != other.position) return false;
163 if (other.term != null) return false;
164 } else if (!term.equals(other.term)) return false;
169 private class PhraseWeight extends Weight {
170 private final Similarity similarity;
173 private float queryNorm;
174 private float queryWeight;
175 private IDFExplanation idfExp;
177 public PhraseWeight(Searcher searcher)
179 this.similarity = getSimilarity(searcher);
181 idfExp = similarity.idfExplain(terms, searcher);
182 idf = idfExp.getIdf();
186 public String toString() { return "weight(" + PhraseQuery.this + ")"; }
189 public Query getQuery() { return PhraseQuery.this; }
192 public float getValue() { return value; }
195 public float sumOfSquaredWeights() {
196 queryWeight = idf * getBoost(); // compute query weight
197 return queryWeight * queryWeight; // square it
201 public void normalize(float queryNorm) {
202 this.queryNorm = queryNorm;
203 queryWeight *= queryNorm; // normalize query weight
204 value = queryWeight * idf; // idf for document
208 public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException {
209 if (terms.size() == 0) // optimize zero-term case
212 PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[terms.size()];
213 for (int i = 0; i < terms.size(); i++) {
214 final Term t = terms.get(i);
215 TermPositions p = reader.termPositions(t);
218 postingsFreqs[i] = new PostingsAndFreq(p, reader.docFreq(t), positions.get(i).intValue(), t);
221 // sort by increasing docFreq order
223 ArrayUtil.mergeSort(postingsFreqs);
226 if (slop == 0) { // optimize exact case
227 ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity,
228 reader.norms(field));
236 new SloppyPhraseScorer(this, postingsFreqs, similarity, slop,
237 reader.norms(field));
242 public Explanation explain(IndexReader reader, int doc)
245 ComplexExplanation result = new ComplexExplanation();
246 result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");
248 StringBuilder docFreqs = new StringBuilder();
249 StringBuilder query = new StringBuilder();
251 docFreqs.append(idfExp.explain());
252 for (int i = 0; i < terms.size(); i++) {
257 Term term = terms.get(i);
259 query.append(term.text());
263 Explanation idfExpl =
264 new Explanation(idf, "idf(" + field + ":" + docFreqs + ")");
266 // explain query weight
267 Explanation queryExpl = new Explanation();
268 queryExpl.setDescription("queryWeight(" + getQuery() + "), product of:");
270 Explanation boostExpl = new Explanation(getBoost(), "boost");
271 if (getBoost() != 1.0f)
272 queryExpl.addDetail(boostExpl);
273 queryExpl.addDetail(idfExpl);
275 Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm");
276 queryExpl.addDetail(queryNormExpl);
278 queryExpl.setValue(boostExpl.getValue() *
280 queryNormExpl.getValue());
282 result.addDetail(queryExpl);
284 // explain field weight
285 Explanation fieldExpl = new Explanation();
286 fieldExpl.setDescription("fieldWeight("+field+":"+query+" in "+doc+
289 Scorer scorer = scorer(reader, true, false);
290 if (scorer == null) {
291 return new Explanation(0.0f, "no matching docs");
293 Explanation tfExplanation = new Explanation();
294 int d = scorer.advance(doc);
297 phraseFreq = scorer.freq();
302 tfExplanation.setValue(similarity.tf(phraseFreq));
303 tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")");
305 fieldExpl.addDetail(tfExplanation);
306 fieldExpl.addDetail(idfExpl);
308 Explanation fieldNormExpl = new Explanation();
309 byte[] fieldNorms = reader.norms(field);
311 fieldNorms!=null ? similarity.decodeNormValue(fieldNorms[doc]) : 1.0f;
312 fieldNormExpl.setValue(fieldNorm);
313 fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
314 fieldExpl.addDetail(fieldNormExpl);
316 fieldExpl.setValue(tfExplanation.getValue() *
318 fieldNormExpl.getValue());
320 result.addDetail(fieldExpl);
323 result.setValue(queryExpl.getValue() * fieldExpl.getValue());
324 result.setMatch(tfExplanation.isMatch());
330 public Weight createWeight(Searcher searcher) throws IOException {
331 if (terms.size() == 1) { // optimize one-term case
332 Term term = terms.get(0);
333 Query termQuery = new TermQuery(term);
334 termQuery.setBoost(getBoost());
335 return termQuery.createWeight(searcher);
337 return new PhraseWeight(searcher);
341 * @see org.apache.lucene.search.Query#extractTerms(Set)
344 public void extractTerms(Set<Term> queryTerms) {
345 queryTerms.addAll(terms);
348 /** Prints a user-readable version of this query. */
350 public String toString(String f) {
351 StringBuilder buffer = new StringBuilder();
352 if (field != null && !field.equals(f)) {
353 buffer.append(field);
358 String[] pieces = new String[maxPosition + 1];
359 for (int i = 0; i < terms.size(); i++) {
360 int pos = positions.get(i).intValue();
361 String s = pieces[pos];
363 s = (terms.get(i)).text();
365 s = s + "|" + (terms.get(i)).text();
369 for (int i = 0; i < pieces.length; i++) {
373 String s = pieces[i];
387 buffer.append(ToStringUtils.boost(getBoost()));
389 return buffer.toString();
392 /** Returns true iff <code>o</code> is equal to this. */
394 public boolean equals(Object o) {
395 if (!(o instanceof PhraseQuery))
397 PhraseQuery other = (PhraseQuery)o;
398 return (this.getBoost() == other.getBoost())
399 && (this.slop == other.slop)
400 && this.terms.equals(other.terms)
401 && this.positions.equals(other.positions);
404 /** Returns a hash code value for this object.*/
406 public int hashCode() {
407 return Float.floatToIntBits(getBoost())
410 ^ positions.hashCode();