lucene-java-3.4.0/lucene/src/java/org/apache/lucene/search/MultiSearcher.java

   1 package org.apache.lucene.search;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import org.apache.lucene.document.Document;
  21 import org.apache.lucene.document.FieldSelector;
  22 import org.apache.lucene.index.CorruptIndexException;
  23 import org.apache.lucene.index.Term;
  24 import org.apache.lucene.index.IndexReader;
  25 import org.apache.lucene.util.ReaderUtil;
  26 import org.apache.lucene.util.DummyConcurrentLock;
  27
  28 import java.io.IOException;
  29 import java.util.HashMap;
  30 import java.util.HashSet;
  31 import java.util.Map;
  32 import java.util.Set;
  33 import java.util.concurrent.Callable;
  34 import java.util.concurrent.locks.Lock;
  35
  36 /** Implements search over a set of <code>Searchables</code>.
  37  *
  38  * <p>Applications usually need only call the inherited {@link #search(Query,int)}
  39  * or {@link #search(Query,Filter,int)} methods.
  40  *
  41  * @deprecated If you are using MultiSearcher over
  42  * IndexSearchers, please use MultiReader instead; this class
  43  * does not properly handle certain kinds of queries (see <a
  44  * href="https://issues.apache.org/jira/browse/LUCENE-2756">LUCENE-2756</a>).
  45  */
  46 @Deprecated
  47 public class MultiSearcher extends Searcher {
  48
  49   /**
  50    * Document Frequency cache acting as a Dummy-Searcher. This class is no
  51    * full-fledged Searcher, but only supports the methods necessary to
  52    * initialize Weights.
  53    */
  54   private static class CachedDfSource extends Searcher {
  55     private final Map<Term,Integer> dfMap; // Map from Terms to corresponding doc freqs
  56     private final int maxDoc; // document count
  57
  58     public CachedDfSource(Map<Term,Integer> dfMap, int maxDoc, Similarity similarity) {
  59       this.dfMap = dfMap;
  60       this.maxDoc = maxDoc;
  61       setSimilarity(similarity);
  62     }
  63
  64     @Override
  65     public int docFreq(Term term) {
  66       int df;
  67       try {
  68         df = dfMap.get(term).intValue();
  69       } catch (NullPointerException e) {
  70         throw new IllegalArgumentException("df for term " + term.text()
  71             + " not available");
  72       }
  73       return df;
  74     }
  75
  76     @Override
  77     public int[] docFreqs(Term[] terms) {
  78       final int[] result = new int[terms.length];
  79       for (int i = 0; i < terms.length; i++) {
  80         result[i] = docFreq(terms[i]);
  81       }
  82       return result;
  83     }
  84
  85     @Override
  86     public int maxDoc() {
  87       return maxDoc;
  88     }
  89
  90     @Override
  91     public Query rewrite(Query query) {
  92       // this is a bit of a hack. We know that a query which
  93       // creates a Weight based on this Dummy-Searcher is
  94       // always already rewritten (see preparedWeight()).
  95       // Therefore we just return the unmodified query here
  96       return query;
  97     }
  98
  99     @Override
 100     public void close() {
 101       throw new UnsupportedOperationException();
 102     }
 103
 104     @Override
 105     public Document doc(int i) {
 106       throw new UnsupportedOperationException();
 107     }
 108
 109     @Override
 110     public Document doc(int i, FieldSelector fieldSelector) {
 111       throw new UnsupportedOperationException();
 112     }
 113
 114     @Override
 115     public Explanation explain(Weight weight,int doc) {
 116       throw new UnsupportedOperationException();
 117     }
 118
 119     @Override
 120     public void search(Weight weight, Filter filter, Collector results) {
 121       throw new UnsupportedOperationException();
 122     }
 123
 124     @Override
 125     public TopDocs search(Weight weight,Filter filter,int n) {
 126       throw new UnsupportedOperationException();
 127     }
 128
 129     @Override
 130     public TopFieldDocs search(Weight weight,Filter filter,int n,Sort sort) {
 131       throw new UnsupportedOperationException();
 132     }
 133   }
 134
 135   private Searchable[] searchables;
 136   private int[] starts;
 137   private int maxDoc = 0;
 138
 139   /** Creates a searcher which searches <i>searchers</i>. */
 140   public MultiSearcher(Searchable... searchables) throws IOException {
 141     this.searchables = searchables;
 142
 143     starts = new int[searchables.length + 1];     // build starts array
 144     for (int i = 0; i < searchables.length; i++) {
 145       starts[i] = maxDoc;
 146       maxDoc += searchables[i].maxDoc();          // compute maxDocs
 147     }
 148     starts[searchables.length] = maxDoc;
 149   }
 150
 151   /** Return the array of {@link Searchable}s this searches. */
 152   public Searchable[] getSearchables() {
 153     return searchables;
 154   }
 155
 156   protected int[] getStarts() {
 157         return starts;
 158   }
 159
 160   // inherit javadoc
 161   @Override
 162   public void close() throws IOException {
 163     for (int i = 0; i < searchables.length; i++)
 164       searchables[i].close();
 165   }
 166
 167   @Override
 168   public int docFreq(Term term) throws IOException {
 169     int docFreq = 0;
 170     for (int i = 0; i < searchables.length; i++)
 171       docFreq += searchables[i].docFreq(term);
 172     return docFreq;
 173   }
 174
 175   // inherit javadoc
 176   @Override
 177   public Document doc(int n) throws CorruptIndexException, IOException {
 178     int i = subSearcher(n);                       // find searcher index
 179     return searchables[i].doc(n - starts[i]);     // dispatch to searcher
 180   }
 181
 182   // inherit javadoc
 183   @Override
 184   public Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
 185     int i = subSearcher(n);                       // find searcher index
 186     return searchables[i].doc(n - starts[i], fieldSelector);      // dispatch to searcher
 187   }
 188
 189   /** Returns index of the searcher for document <code>n</code> in the array
 190    * used to construct this searcher. */
 191   public int subSearcher(int n) {                 // find searcher for doc n:
 192     return ReaderUtil.subIndex(n, starts);
 193   }
 194
 195   /** Returns the document number of document <code>n</code> within its
 196    * sub-index. */
 197   public int subDoc(int n) {
 198     return n - starts[subSearcher(n)];
 199   }
 200
 201   @Override
 202   public int maxDoc() throws IOException {
 203     return maxDoc;
 204   }
 205
 206   @Override
 207   public TopDocs search(Weight weight, Filter filter, int nDocs)
 208       throws IOException {
 209
 210     nDocs = Math.min(nDocs, maxDoc());
 211     final HitQueue hq = new HitQueue(nDocs, false);
 212     int totalHits = 0;
 213
 214     for (int i = 0; i < searchables.length; i++) { // search each searcher
 215       final TopDocs docs = new MultiSearcherCallableNoSort(DummyConcurrentLock.INSTANCE,
 216         searchables[i], weight, filter, nDocs, hq, i, starts).call();
 217       totalHits += docs.totalHits; // update totalHits
 218     }
 219
 220     final ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
 221     for (int i = hq.size()-1; i >= 0; i--)        // put docs in array
 222       scoreDocs[i] = hq.pop();
 223
 224     float maxScore = (totalHits==0) ? Float.NEGATIVE_INFINITY : scoreDocs[0].score;
 225
 226     return new TopDocs(totalHits, scoreDocs, maxScore);
 227   }
 228
 229   @Override
 230   public TopFieldDocs search (Weight weight, Filter filter, int n, Sort sort) throws IOException {
 231     n = Math.min(n, maxDoc());
 232     FieldDocSortedHitQueue hq = new FieldDocSortedHitQueue(n);
 233     int totalHits = 0;
 234
 235     float maxScore=Float.NEGATIVE_INFINITY;
 236
 237     for (int i = 0; i < searchables.length; i++) { // search each searcher
 238       final TopFieldDocs docs = new MultiSearcherCallableWithSort(DummyConcurrentLock.INSTANCE,
 239         searchables[i], weight, filter, n, hq, sort, i, starts).call();
 240       totalHits += docs.totalHits; // update totalHits
 241       maxScore = Math.max(maxScore, docs.getMaxScore());
 242     }
 243
 244     final ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
 245     for (int i = hq.size() - 1; i >= 0; i--)      // put docs in array
 246       scoreDocs[i] =  hq.pop();
 247
 248     return new TopFieldDocs (totalHits, scoreDocs, hq.getFields(), maxScore);
 249   }
 250
 251   // inherit javadoc
 252   @Override
 253   public void search(Weight weight, Filter filter, final Collector collector)
 254   throws IOException {
 255     for (int i = 0; i < searchables.length; i++) {
 256
 257       final int start = starts[i];
 258
 259       final Collector hc = new Collector() {
 260         @Override
 261         public void setScorer(Scorer scorer) throws IOException {
 262           collector.setScorer(scorer);
 263         }
 264         @Override
 265         public void collect(int doc) throws IOException {
 266           collector.collect(doc);
 267         }
 268         @Override
 269         public void setNextReader(IndexReader reader, int docBase) throws IOException {
 270           collector.setNextReader(reader, start + docBase);
 271         }
 272         @Override
 273         public boolean acceptsDocsOutOfOrder() {
 274           return collector.acceptsDocsOutOfOrder();
 275         }
 276       };
 277
 278       searchables[i].search(weight, filter, hc);
 279     }
 280   }
 281
 282   @Override
 283   public Query rewrite(Query original) throws IOException {
 284     final Query[] queries = new Query[searchables.length];
 285     for (int i = 0; i < searchables.length; i++) {
 286       queries[i] = searchables[i].rewrite(original);
 287     }
 288     return queries[0].combine(queries);
 289   }
 290
 291   @Override
 292   public Explanation explain(Weight weight, int doc) throws IOException {
 293     final int i = subSearcher(doc);                       // find searcher index
 294     return searchables[i].explain(weight, doc - starts[i]); // dispatch to searcher
 295   }
 296
 297   /**
 298    * Create weight in multiple index scenario.
 299    *
 300    * Distributed query processing is done in the following steps:
 301    * 1. rewrite query
 302    * 2. extract necessary terms
 303    * 3. collect dfs for these terms from the Searchables
 304    * 4. create query weight using aggregate dfs.
 305    * 5. distribute that weight to Searchables
 306    * 6. merge results
 307    *
 308    * Steps 1-4 are done here, 5+6 in the search() methods
 309    *
 310    * @return rewritten queries
 311    */
 312   @Override
 313   public Weight createNormalizedWeight(Query original) throws IOException {
 314     // step 1
 315     final Query rewrittenQuery = rewrite(original);
 316
 317     // step 2
 318     final Set<Term> terms = new HashSet<Term>();
 319     rewrittenQuery.extractTerms(terms);
 320
 321     // step3
 322     final Map<Term,Integer> dfMap = createDocFrequencyMap(terms);
 323
 324     // step4
 325     final int numDocs = maxDoc();
 326     final CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs, getSimilarity());
 327
 328     return cacheSim.createNormalizedWeight(rewrittenQuery);
 329   }
 330   /**
 331    * Collects the document frequency for the given terms form all searchables
 332    * @param terms term set used to collect the document frequency form all
 333    *        searchables
 334    * @return a map with a term as the key and the terms aggregated document
 335    *         frequency as a value
 336    * @throws IOException if a searchable throws an {@link IOException}
 337    */
 338    Map<Term, Integer> createDocFrequencyMap(final Set<Term> terms) throws IOException  {
 339     final Term[] allTermsArray = terms.toArray(new Term[terms.size()]);
 340     final int[] aggregatedDfs = new int[allTermsArray.length];
 341     for (Searchable searchable : searchables) {
 342       final int[] dfs = searchable.docFreqs(allTermsArray);
 343       for(int j=0; j<aggregatedDfs.length; j++){
 344         aggregatedDfs[j] += dfs[j];
 345       }
 346     }
 347     final HashMap<Term,Integer> dfMap = new HashMap<Term,Integer>();
 348     for(int i=0; i<allTermsArray.length; i++) {
 349       dfMap.put(allTermsArray[i], Integer.valueOf(aggregatedDfs[i]));
 350     }
 351     return dfMap;
 352   }
 353
 354   /**
 355    * A thread subclass for searching a single searchable
 356    */
 357   static final class MultiSearcherCallableNoSort implements Callable<TopDocs> {
 358
 359     private final Lock lock;
 360     private final Searchable searchable;
 361     private final Weight weight;
 362     private final Filter filter;
 363     private final int nDocs;
 364     private final int i;
 365     private final HitQueue hq;
 366     private final int[] starts;
 367
 368     public MultiSearcherCallableNoSort(Lock lock, Searchable searchable, Weight weight,
 369         Filter filter, int nDocs, HitQueue hq, int i, int[] starts) {
 370       this.lock = lock;
 371       this.searchable = searchable;
 372       this.weight = weight;
 373       this.filter = filter;
 374       this.nDocs = nDocs;
 375       this.hq = hq;
 376       this.i = i;
 377       this.starts = starts;
 378     }
 379
 380     public TopDocs call() throws IOException {
 381       final TopDocs docs = searchable.search (weight, filter, nDocs);
 382       final ScoreDoc[] scoreDocs = docs.scoreDocs;
 383       for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq
 384         final ScoreDoc scoreDoc = scoreDocs[j];
 385         scoreDoc.doc += starts[i]; // convert doc
 386         //it would be so nice if we had a thread-safe insert
 387         lock.lock();
 388         try {
 389           if (scoreDoc == hq.insertWithOverflow(scoreDoc))
 390             break;
 391         } finally {
 392           lock.unlock();
 393         }
 394       }
 395       return docs;
 396     }
 397   }
 398
 399   /**
 400    * A thread subclass for searching a single searchable
 401    */
 402   static final class MultiSearcherCallableWithSort implements Callable<TopFieldDocs> {
 403
 404     private final Lock lock;
 405     private final Searchable searchable;
 406     private final Weight weight;
 407     private final Filter filter;
 408     private final int nDocs;
 409     private final int i;
 410     private final FieldDocSortedHitQueue hq;
 411     private final int[] starts;
 412     private final Sort sort;
 413
 414     public MultiSearcherCallableWithSort(Lock lock, Searchable searchable, Weight weight,
 415         Filter filter, int nDocs, FieldDocSortedHitQueue hq, Sort sort, int i, int[] starts) {
 416       this.lock = lock;
 417       this.searchable = searchable;
 418       this.weight = weight;
 419       this.filter = filter;
 420       this.nDocs = nDocs;
 421       this.hq = hq;
 422       this.i = i;
 423       this.starts = starts;
 424       this.sort = sort;
 425     }
 426
 427     public TopFieldDocs call() throws IOException {
 428       final TopFieldDocs docs = searchable.search (weight, filter, nDocs, sort);
 429       // If one of the Sort fields is FIELD_DOC, need to fix its values, so that
 430       // it will break ties by doc Id properly. Otherwise, it will compare to
 431       // 'relative' doc Ids, that belong to two different searchables.
 432       for (int j = 0; j < docs.fields.length; j++) {
 433         if (docs.fields[j].getType() == SortField.DOC) {
 434           // iterate over the score docs and change their fields value
 435           for (int j2 = 0; j2 < docs.scoreDocs.length; j2++) {
 436             FieldDoc fd = (FieldDoc) docs.scoreDocs[j2];
 437             fd.fields[j] = Integer.valueOf(((Integer) fd.fields[j]).intValue() + starts[i]);
 438           }
 439           break;
 440         }
 441       }
 442
 443       lock.lock();
 444       try {
 445         hq.setFields(docs.fields);
 446       } finally {
 447         lock.unlock();
 448       }
 449
 450       final ScoreDoc[] scoreDocs = docs.scoreDocs;
 451       for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq
 452         final FieldDoc fieldDoc = (FieldDoc) scoreDocs[j];
 453         fieldDoc.doc += starts[i]; // convert doc
 454         //it would be so nice if we had a thread-safe insert
 455         lock.lock();
 456         try {
 457           if (fieldDoc == hq.insertWithOverflow(fieldDoc))
 458             break;
 459         } finally {
 460           lock.unlock();
 461         }
 462       }
 463       return docs;
 464     }
 465   }
 466
 467 }