1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.document.Document;
21 import org.apache.lucene.document.FieldSelector;
22 import org.apache.lucene.index.CorruptIndexException;
23 import org.apache.lucene.index.Term;
24 import org.apache.lucene.index.IndexReader;
25 import org.apache.lucene.util.ReaderUtil;
26 import org.apache.lucene.util.DummyConcurrentLock;
28 import java.io.IOException;
29 import java.util.HashMap;
30 import java.util.HashSet;
33 import java.util.concurrent.Callable;
34 import java.util.concurrent.locks.Lock;
36 /** Implements search over a set of <code>Searchables</code>.
38 * <p>Applications usually need only call the inherited {@link #search(Query,int)}
39 * or {@link #search(Query,Filter,int)} methods.
41 * @deprecated If you are using MultiSearcher over
42 * IndexSearchers, please use MultiReader instead; this class
43 * does not properly handle certain kinds of queries (see <a
44 * href="https://issues.apache.org/jira/browse/LUCENE-2756">LUCENE-2756</a>).
47 public class MultiSearcher extends Searcher {
50 * Document Frequency cache acting as a Dummy-Searcher. This class is no
51 * full-fledged Searcher, but only supports the methods necessary to
54 private static class CachedDfSource extends Searcher {
55 private final Map<Term,Integer> dfMap; // Map from Terms to corresponding doc freqs
56 private final int maxDoc; // document count
58 public CachedDfSource(Map<Term,Integer> dfMap, int maxDoc, Similarity similarity) {
61 setSimilarity(similarity);
65 public int docFreq(Term term) {
68 df = dfMap.get(term).intValue();
69 } catch (NullPointerException e) {
70 throw new IllegalArgumentException("df for term " + term.text()
77 public int[] docFreqs(Term[] terms) {
78 final int[] result = new int[terms.length];
79 for (int i = 0; i < terms.length; i++) {
80 result[i] = docFreq(terms[i]);
91 public Query rewrite(Query query) {
92 // this is a bit of a hack. We know that a query which
93 // creates a Weight based on this Dummy-Searcher is
94 // always already rewritten (see preparedWeight()).
95 // Therefore we just return the unmodified query here
100 public void close() {
101 throw new UnsupportedOperationException();
105 public Document doc(int i) {
106 throw new UnsupportedOperationException();
110 public Document doc(int i, FieldSelector fieldSelector) {
111 throw new UnsupportedOperationException();
115 public Explanation explain(Weight weight,int doc) {
116 throw new UnsupportedOperationException();
120 public void search(Weight weight, Filter filter, Collector results) {
121 throw new UnsupportedOperationException();
125 public TopDocs search(Weight weight,Filter filter,int n) {
126 throw new UnsupportedOperationException();
130 public TopFieldDocs search(Weight weight,Filter filter,int n,Sort sort) {
131 throw new UnsupportedOperationException();
135 private Searchable[] searchables;
136 private int[] starts;
137 private int maxDoc = 0;
139 /** Creates a searcher which searches <i>searchers</i>. */
140 public MultiSearcher(Searchable... searchables) throws IOException {
141 this.searchables = searchables;
143 starts = new int[searchables.length + 1]; // build starts array
144 for (int i = 0; i < searchables.length; i++) {
146 maxDoc += searchables[i].maxDoc(); // compute maxDocs
148 starts[searchables.length] = maxDoc;
151 /** Return the array of {@link Searchable}s this searches. */
152 public Searchable[] getSearchables() {
156 protected int[] getStarts() {
162 public void close() throws IOException {
163 for (int i = 0; i < searchables.length; i++)
164 searchables[i].close();
168 public int docFreq(Term term) throws IOException {
170 for (int i = 0; i < searchables.length; i++)
171 docFreq += searchables[i].docFreq(term);
177 public Document doc(int n) throws CorruptIndexException, IOException {
178 int i = subSearcher(n); // find searcher index
179 return searchables[i].doc(n - starts[i]); // dispatch to searcher
184 public Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
185 int i = subSearcher(n); // find searcher index
186 return searchables[i].doc(n - starts[i], fieldSelector); // dispatch to searcher
189 /** Returns index of the searcher for document <code>n</code> in the array
190 * used to construct this searcher. */
191 public int subSearcher(int n) { // find searcher for doc n:
192 return ReaderUtil.subIndex(n, starts);
195 /** Returns the document number of document <code>n</code> within its
197 public int subDoc(int n) {
198 return n - starts[subSearcher(n)];
202 public int maxDoc() throws IOException {
207 public TopDocs search(Weight weight, Filter filter, int nDocs)
210 nDocs = Math.min(nDocs, maxDoc());
211 final HitQueue hq = new HitQueue(nDocs, false);
214 for (int i = 0; i < searchables.length; i++) { // search each searcher
215 final TopDocs docs = new MultiSearcherCallableNoSort(DummyConcurrentLock.INSTANCE,
216 searchables[i], weight, filter, nDocs, hq, i, starts).call();
217 totalHits += docs.totalHits; // update totalHits
220 final ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
221 for (int i = hq.size()-1; i >= 0; i--) // put docs in array
222 scoreDocs[i] = hq.pop();
224 float maxScore = (totalHits==0) ? Float.NEGATIVE_INFINITY : scoreDocs[0].score;
226 return new TopDocs(totalHits, scoreDocs, maxScore);
230 public TopFieldDocs search (Weight weight, Filter filter, int n, Sort sort) throws IOException {
231 n = Math.min(n, maxDoc());
232 FieldDocSortedHitQueue hq = new FieldDocSortedHitQueue(n);
235 float maxScore=Float.NEGATIVE_INFINITY;
237 for (int i = 0; i < searchables.length; i++) { // search each searcher
238 final TopFieldDocs docs = new MultiSearcherCallableWithSort(DummyConcurrentLock.INSTANCE,
239 searchables[i], weight, filter, n, hq, sort, i, starts).call();
240 totalHits += docs.totalHits; // update totalHits
241 maxScore = Math.max(maxScore, docs.getMaxScore());
244 final ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
245 for (int i = hq.size() - 1; i >= 0; i--) // put docs in array
246 scoreDocs[i] = hq.pop();
248 return new TopFieldDocs (totalHits, scoreDocs, hq.getFields(), maxScore);
253 public void search(Weight weight, Filter filter, final Collector collector)
255 for (int i = 0; i < searchables.length; i++) {
257 final int start = starts[i];
259 final Collector hc = new Collector() {
261 public void setScorer(Scorer scorer) throws IOException {
262 collector.setScorer(scorer);
265 public void collect(int doc) throws IOException {
266 collector.collect(doc);
269 public void setNextReader(IndexReader reader, int docBase) throws IOException {
270 collector.setNextReader(reader, start + docBase);
273 public boolean acceptsDocsOutOfOrder() {
274 return collector.acceptsDocsOutOfOrder();
278 searchables[i].search(weight, filter, hc);
283 public Query rewrite(Query original) throws IOException {
284 final Query[] queries = new Query[searchables.length];
285 for (int i = 0; i < searchables.length; i++) {
286 queries[i] = searchables[i].rewrite(original);
288 return queries[0].combine(queries);
292 public Explanation explain(Weight weight, int doc) throws IOException {
293 final int i = subSearcher(doc); // find searcher index
294 return searchables[i].explain(weight, doc - starts[i]); // dispatch to searcher
298 * Create weight in multiple index scenario.
300 * Distributed query processing is done in the following steps:
302 * 2. extract necessary terms
303 * 3. collect dfs for these terms from the Searchables
304 * 4. create query weight using aggregate dfs.
305 * 5. distribute that weight to Searchables
308 * Steps 1-4 are done here, 5+6 in the search() methods
310 * @return rewritten queries
313 public Weight createNormalizedWeight(Query original) throws IOException {
315 final Query rewrittenQuery = rewrite(original);
318 final Set<Term> terms = new HashSet<Term>();
319 rewrittenQuery.extractTerms(terms);
322 final Map<Term,Integer> dfMap = createDocFrequencyMap(terms);
325 final int numDocs = maxDoc();
326 final CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs, getSimilarity());
328 return cacheSim.createNormalizedWeight(rewrittenQuery);
331 * Collects the document frequency for the given terms form all searchables
332 * @param terms term set used to collect the document frequency form all
334 * @return a map with a term as the key and the terms aggregated document
335 * frequency as a value
336 * @throws IOException if a searchable throws an {@link IOException}
338 Map<Term, Integer> createDocFrequencyMap(final Set<Term> terms) throws IOException {
339 final Term[] allTermsArray = terms.toArray(new Term[terms.size()]);
340 final int[] aggregatedDfs = new int[allTermsArray.length];
341 for (Searchable searchable : searchables) {
342 final int[] dfs = searchable.docFreqs(allTermsArray);
343 for(int j=0; j<aggregatedDfs.length; j++){
344 aggregatedDfs[j] += dfs[j];
347 final HashMap<Term,Integer> dfMap = new HashMap<Term,Integer>();
348 for(int i=0; i<allTermsArray.length; i++) {
349 dfMap.put(allTermsArray[i], Integer.valueOf(aggregatedDfs[i]));
355 * A thread subclass for searching a single searchable
357 static final class MultiSearcherCallableNoSort implements Callable<TopDocs> {
359 private final Lock lock;
360 private final Searchable searchable;
361 private final Weight weight;
362 private final Filter filter;
363 private final int nDocs;
365 private final HitQueue hq;
366 private final int[] starts;
368 public MultiSearcherCallableNoSort(Lock lock, Searchable searchable, Weight weight,
369 Filter filter, int nDocs, HitQueue hq, int i, int[] starts) {
371 this.searchable = searchable;
372 this.weight = weight;
373 this.filter = filter;
377 this.starts = starts;
380 public TopDocs call() throws IOException {
381 final TopDocs docs = searchable.search (weight, filter, nDocs);
382 final ScoreDoc[] scoreDocs = docs.scoreDocs;
383 for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq
384 final ScoreDoc scoreDoc = scoreDocs[j];
385 scoreDoc.doc += starts[i]; // convert doc
386 //it would be so nice if we had a thread-safe insert
389 if (scoreDoc == hq.insertWithOverflow(scoreDoc))
400 * A thread subclass for searching a single searchable
402 static final class MultiSearcherCallableWithSort implements Callable<TopFieldDocs> {
404 private final Lock lock;
405 private final Searchable searchable;
406 private final Weight weight;
407 private final Filter filter;
408 private final int nDocs;
410 private final FieldDocSortedHitQueue hq;
411 private final int[] starts;
412 private final Sort sort;
414 public MultiSearcherCallableWithSort(Lock lock, Searchable searchable, Weight weight,
415 Filter filter, int nDocs, FieldDocSortedHitQueue hq, Sort sort, int i, int[] starts) {
417 this.searchable = searchable;
418 this.weight = weight;
419 this.filter = filter;
423 this.starts = starts;
427 public TopFieldDocs call() throws IOException {
428 final TopFieldDocs docs = searchable.search (weight, filter, nDocs, sort);
429 // If one of the Sort fields is FIELD_DOC, need to fix its values, so that
430 // it will break ties by doc Id properly. Otherwise, it will compare to
431 // 'relative' doc Ids, that belong to two different searchables.
432 for (int j = 0; j < docs.fields.length; j++) {
433 if (docs.fields[j].getType() == SortField.DOC) {
434 // iterate over the score docs and change their fields value
435 for (int j2 = 0; j2 < docs.scoreDocs.length; j2++) {
436 FieldDoc fd = (FieldDoc) docs.scoreDocs[j2];
437 fd.fields[j] = Integer.valueOf(((Integer) fd.fields[j]).intValue() + starts[i]);
445 hq.setFields(docs.fields);
450 final ScoreDoc[] scoreDocs = docs.scoreDocs;
451 for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq
452 final FieldDoc fieldDoc = (FieldDoc) scoreDocs[j];
453 fieldDoc.doc += starts[i]; // convert doc
454 //it would be so nice if we had a thread-safe insert
457 if (fieldDoc == hq.insertWithOverflow(fieldDoc))