lucene-java-3.4.0/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java

   1 package org.apache.lucene.search;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.IOException;
  21 import java.util.*;
  22
  23 import org.apache.lucene.index.IndexReader;
  24 import org.apache.lucene.index.MultipleTermPositions;
  25 import org.apache.lucene.index.Term;
  26 import org.apache.lucene.index.TermPositions;
  27 import org.apache.lucene.search.Explanation.IDFExplanation;
  28 import org.apache.lucene.util.ArrayUtil;
  29 import org.apache.lucene.util.ToStringUtils;
  30
  31 /**
  32  * MultiPhraseQuery is a generalized version of PhraseQuery, with an added
  33  * method {@link #add(Term[])}.
  34  * To use this class, to search for the phrase "Microsoft app*" first use
  35  * add(Term) on the term "Microsoft", then find all terms that have "app" as
  36  * prefix using IndexReader.terms(Term), and use MultiPhraseQuery.add(Term[]
  37  * terms) to add them to the query.
  38  *
  39  * @version 1.0
  40  */
  41 public class MultiPhraseQuery extends Query {
  42   private String field;
  43   private ArrayList<Term[]> termArrays = new ArrayList<Term[]>();
  44   private ArrayList<Integer> positions = new ArrayList<Integer>();
  45
  46   private int slop = 0;
  47
  48   /** Sets the phrase slop for this query.
  49    * @see PhraseQuery#setSlop(int)
  50    */
  51   public void setSlop(int s) { slop = s; }
  52
  53   /** Sets the phrase slop for this query.
  54    * @see PhraseQuery#getSlop()
  55    */
  56   public int getSlop() { return slop; }
  57
  58   /** Add a single term at the next position in the phrase.
  59    * @see PhraseQuery#add(Term)
  60    */
  61   public void add(Term term) { add(new Term[]{term}); }
  62
  63   /** Add multiple terms at the next position in the phrase.  Any of the terms
  64    * may match.
  65    *
  66    * @see PhraseQuery#add(Term)
  67    */
  68   public void add(Term[] terms) {
  69     int position = 0;
  70     if (positions.size() > 0)
  71       position = positions.get(positions.size()-1).intValue() + 1;
  72
  73     add(terms, position);
  74   }
  75
  76   /**
  77    * Allows to specify the relative position of terms within the phrase.
  78    *
  79    * @see PhraseQuery#add(Term, int)
  80    * @param terms
  81    * @param position
  82    */
  83   public void add(Term[] terms, int position) {
  84     if (termArrays.size() == 0)
  85       field = terms[0].field();
  86
  87     for (int i = 0; i < terms.length; i++) {
  88       if (terms[i].field() != field) {
  89         throw new IllegalArgumentException(
  90             "All phrase terms must be in the same field (" + field + "): "
  91                 + terms[i]);
  92       }
  93     }
  94
  95     termArrays.add(terms);
  96     positions.add(Integer.valueOf(position));
  97   }
  98
  99   /**
 100    * Returns a List of the terms in the multiphrase.
 101    * Do not modify the List or its contents.
 102    */
 103   public List<Term[]> getTermArrays() {
 104           return Collections.unmodifiableList(termArrays);
 105   }
 106
 107   /**
 108    * Returns the relative positions of terms in this phrase.
 109    */
 110   public int[] getPositions() {
 111     int[] result = new int[positions.size()];
 112     for (int i = 0; i < positions.size(); i++)
 113       result[i] = positions.get(i).intValue();
 114     return result;
 115   }
 116
 117   // inherit javadoc
 118   @Override
 119   public void extractTerms(Set<Term> terms) {
 120     for (final Term[] arr : termArrays) {
 121       for (final Term term: arr) {
 122         terms.add(term);
 123       }
 124     }
 125   }
 126
 127
 128   private class MultiPhraseWeight extends Weight {
 129     private Similarity similarity;
 130     private float value;
 131     private final IDFExplanation idfExp;
 132     private float idf;
 133     private float queryNorm;
 134     private float queryWeight;
 135
 136     public MultiPhraseWeight(Searcher searcher)
 137       throws IOException {
 138       this.similarity = getSimilarity(searcher);
 139
 140       // compute idf
 141       ArrayList<Term> allTerms = new ArrayList<Term>();
 142       for(final Term[] terms: termArrays) {
 143         for (Term term: terms) {
 144           allTerms.add(term);
 145         }
 146       }
 147       idfExp = similarity.idfExplain(allTerms, searcher);
 148       idf = idfExp.getIdf();
 149     }
 150
 151     @Override
 152     public Query getQuery() { return MultiPhraseQuery.this; }
 153
 154     @Override
 155     public float getValue() { return value; }
 156
 157     @Override
 158     public float sumOfSquaredWeights() {
 159       queryWeight = idf * getBoost();             // compute query weight
 160       return queryWeight * queryWeight;           // square it
 161     }
 162
 163     @Override
 164     public void normalize(float queryNorm) {
 165       this.queryNorm = queryNorm;
 166       queryWeight *= queryNorm;                   // normalize query weight
 167       value = queryWeight * idf;                  // idf for document
 168     }
 169
 170     @Override
 171     public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException {
 172       if (termArrays.size() == 0)                  // optimize zero-term case
 173         return null;
 174
 175       PhraseQuery.PostingsAndFreq[] postingsFreqs = new PhraseQuery.PostingsAndFreq[termArrays.size()];
 176
 177       for (int pos=0; pos<postingsFreqs.length; pos++) {
 178         Term[] terms = termArrays.get(pos);
 179
 180         final TermPositions p;
 181         int docFreq;
 182
 183         if (terms.length > 1) {
 184           p = new MultipleTermPositions(reader, terms);
 185
 186           // coarse -- this overcounts since a given doc can
 187           // have more than one terms:
 188           docFreq = 0;
 189           for(int termIdx=0;termIdx<terms.length;termIdx++) {
 190             docFreq += reader.docFreq(terms[termIdx]);
 191           }
 192         } else {
 193           p = reader.termPositions(terms[0]);
 194           docFreq = reader.docFreq(terms[0]);
 195
 196           if (p == null)
 197             return null;
 198         }
 199
 200         postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(p, docFreq, positions.get(pos).intValue(), terms[0]);
 201       }
 202
 203       // sort by increasing docFreq order
 204       if (slop == 0) {
 205         ArrayUtil.mergeSort(postingsFreqs);
 206       }
 207
 208       if (slop == 0) {
 209         ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity,
 210                                                     reader.norms(field));
 211         if (s.noDocs) {
 212           return null;
 213         } else {
 214           return s;
 215         }
 216       } else {
 217         return new SloppyPhraseScorer(this, postingsFreqs, similarity,
 218                                       slop, reader.norms(field));
 219       }
 220     }
 221
 222     @Override
 223     public Explanation explain(IndexReader reader, int doc)
 224       throws IOException {
 225       ComplexExplanation result = new ComplexExplanation();
 226       result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");
 227
 228       Explanation idfExpl = new Explanation(idf, "idf(" + field + ":" + idfExp.explain() +")");
 229
 230       // explain query weight
 231       Explanation queryExpl = new Explanation();
 232       queryExpl.setDescription("queryWeight(" + getQuery() + "), product of:");
 233
 234       Explanation boostExpl = new Explanation(getBoost(), "boost");
 235       if (getBoost() != 1.0f)
 236         queryExpl.addDetail(boostExpl);
 237
 238       queryExpl.addDetail(idfExpl);
 239
 240       Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm");
 241       queryExpl.addDetail(queryNormExpl);
 242
 243       queryExpl.setValue(boostExpl.getValue() *
 244                          idfExpl.getValue() *
 245                          queryNormExpl.getValue());
 246
 247       result.addDetail(queryExpl);
 248
 249       // explain field weight
 250       ComplexExplanation fieldExpl = new ComplexExplanation();
 251       fieldExpl.setDescription("fieldWeight("+getQuery()+" in "+doc+
 252                                "), product of:");
 253
 254       Scorer scorer = scorer(reader, true, false);
 255       if (scorer == null) {
 256         return new Explanation(0.0f, "no matching docs");
 257       }
 258
 259       Explanation tfExplanation = new Explanation();
 260       int d = scorer.advance(doc);
 261       float phraseFreq;
 262       if (d == doc) {
 263         phraseFreq = scorer.freq();
 264       } else {
 265         phraseFreq = 0.0f;
 266       }
 267
 268       tfExplanation.setValue(similarity.tf(phraseFreq));
 269       tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")");
 270       fieldExpl.addDetail(tfExplanation);
 271       fieldExpl.addDetail(idfExpl);
 272
 273       Explanation fieldNormExpl = new Explanation();
 274       byte[] fieldNorms = reader.norms(field);
 275       float fieldNorm =
 276         fieldNorms!=null ? similarity.decodeNormValue(fieldNorms[doc]) : 1.0f;
 277       fieldNormExpl.setValue(fieldNorm);
 278       fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
 279       fieldExpl.addDetail(fieldNormExpl);
 280
 281       fieldExpl.setMatch(Boolean.valueOf(tfExplanation.isMatch()));
 282       fieldExpl.setValue(tfExplanation.getValue() *
 283                          idfExpl.getValue() *
 284                          fieldNormExpl.getValue());
 285
 286       result.addDetail(fieldExpl);
 287       result.setMatch(fieldExpl.getMatch());
 288
 289       // combine them
 290       result.setValue(queryExpl.getValue() * fieldExpl.getValue());
 291
 292       if (queryExpl.getValue() == 1.0f)
 293         return fieldExpl;
 294
 295       return result;
 296     }
 297   }
 298
 299   @Override
 300   public Query rewrite(IndexReader reader) {
 301     if (termArrays.size() == 1) {                 // optimize one-term case
 302       Term[] terms = termArrays.get(0);
 303       BooleanQuery boq = new BooleanQuery(true);
 304       for (int i=0; i<terms.length; i++) {
 305         boq.add(new TermQuery(terms[i]), BooleanClause.Occur.SHOULD);
 306       }
 307       boq.setBoost(getBoost());
 308       return boq;
 309     } else {
 310       return this;
 311     }
 312   }
 313
 314   @Override
 315   public Weight createWeight(Searcher searcher) throws IOException {
 316     return new MultiPhraseWeight(searcher);
 317   }
 318
 319   /** Prints a user-readable version of this query. */
 320   @Override
 321   public final String toString(String f) {
 322     StringBuilder buffer = new StringBuilder();
 323     if (field == null || !field.equals(f)) {
 324       buffer.append(field);
 325       buffer.append(":");
 326     }
 327
 328     buffer.append("\"");
 329     Iterator<Term[]> i = termArrays.iterator();
 330     while (i.hasNext()) {
 331       Term[] terms = i.next();
 332       if (terms.length > 1) {
 333         buffer.append("(");
 334         for (int j = 0; j < terms.length; j++) {
 335           buffer.append(terms[j].text());
 336           if (j < terms.length-1)
 337             buffer.append(" ");
 338         }
 339         buffer.append(")");
 340       } else {
 341         buffer.append(terms[0].text());
 342       }
 343       if (i.hasNext())
 344         buffer.append(" ");
 345     }
 346     buffer.append("\"");
 347
 348     if (slop != 0) {
 349       buffer.append("~");
 350       buffer.append(slop);
 351     }
 352
 353     buffer.append(ToStringUtils.boost(getBoost()));
 354
 355     return buffer.toString();
 356   }
 357
 358
 359   /** Returns true if <code>o</code> is equal to this. */
 360   @Override
 361   public boolean equals(Object o) {
 362     if (!(o instanceof MultiPhraseQuery)) return false;
 363     MultiPhraseQuery other = (MultiPhraseQuery)o;
 364     return this.getBoost() == other.getBoost()
 365       && this.slop == other.slop
 366       && termArraysEquals(this.termArrays, other.termArrays)
 367       && this.positions.equals(other.positions);
 368   }
 369
 370   /** Returns a hash code value for this object.*/
 371   @Override
 372   public int hashCode() {
 373     return Float.floatToIntBits(getBoost())
 374       ^ slop
 375       ^ termArraysHashCode()
 376       ^ positions.hashCode()
 377       ^ 0x4AC65113;
 378   }
 379
 380   // Breakout calculation of the termArrays hashcode
 381   private int termArraysHashCode() {
 382     int hashCode = 1;
 383     for (final Term[] termArray: termArrays) {
 384       hashCode = 31 * hashCode
 385           + (termArray == null ? 0 : Arrays.hashCode(termArray));
 386     }
 387     return hashCode;
 388   }
 389
 390   // Breakout calculation of the termArrays equals
 391   private boolean termArraysEquals(List<Term[]> termArrays1, List<Term[]> termArrays2) {
 392     if (termArrays1.size() != termArrays2.size()) {
 393       return false;
 394     }
 395     ListIterator<Term[]> iterator1 = termArrays1.listIterator();
 396     ListIterator<Term[]> iterator2 = termArrays2.listIterator();
 397     while (iterator1.hasNext()) {
 398       Term[] termArray1 = iterator1.next();
 399       Term[] termArray2 = iterator2.next();
 400       if (!(termArray1 == null ? termArray2 == null : Arrays.equals(termArray1,
 401           termArray2))) {
 402         return false;
 403       }
 404     }
 405     return true;
 406   }
 407 }