lucene-java-3.5.0/lucene/src/test/org/apache/lucene/search/TestTopDocsMerge.java

   1 package org.apache.lucene.search;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.IOException;
  21 import java.util.ArrayList;
  22 import java.util.List;
  23
  24 import org.apache.lucene.document.Document;
  25 import org.apache.lucene.document.Field;
  26 import org.apache.lucene.document.NumericField;
  27 import org.apache.lucene.index.IndexReader;
  28 import org.apache.lucene.index.RandomIndexWriter;
  29 import org.apache.lucene.index.Term;
  30 import org.apache.lucene.store.Directory;
  31 import org.apache.lucene.util.LuceneTestCase;
  32 import org.apache.lucene.util.ReaderUtil;
  33 import org.apache.lucene.util._TestUtil;
  34
  35 public class TestTopDocsMerge extends LuceneTestCase {
  36
  37   private static class ShardSearcher {
  38     private final IndexSearcher subSearcher;
  39
  40     public ShardSearcher(IndexReader subReader) {
  41       this.subSearcher = new IndexSearcher(subReader);
  42     }
  43
  44     public void search(Weight weight, Collector collector) throws IOException {
  45       subSearcher.search(weight, null, collector);
  46     }
  47
  48     public TopDocs search(Weight weight, int topN) throws IOException {
  49       return subSearcher.search(weight, null, topN);
  50     }
  51
  52     @Override
  53     public String toString() {
  54       return "ShardSearcher(" + subSearcher + ")";
  55     }
  56   }
  57
  58   public void testSort() throws Exception {
  59
  60     IndexReader reader = null;
  61     Directory dir = null;
  62
  63     final int numDocs = atLeast(1000);
  64     //final int numDocs = atLeast(50);
  65
  66     final String[] tokens = new String[] {"a", "b", "c", "d", "e"};
  67
  68     if (VERBOSE) {
  69       System.out.println("TEST: make index");
  70     }
  71
  72     {
  73       dir = newDirectory();
  74       final RandomIndexWriter w = new RandomIndexWriter(random, dir);
  75       // w.setDoRandomForceMerge(false);
  76
  77       // w.w.getConfig().setMaxBufferedDocs(atLeast(100));
  78
  79       final String[] content = new String[atLeast(20)];
  80
  81       for(int contentIDX=0;contentIDX<content.length;contentIDX++) {
  82         final StringBuilder sb = new StringBuilder();
  83         final int numTokens = _TestUtil.nextInt(random, 1, 10);
  84         for(int tokenIDX=0;tokenIDX<numTokens;tokenIDX++) {
  85           sb.append(tokens[random.nextInt(tokens.length)]).append(' ');
  86         }
  87         content[contentIDX] = sb.toString();
  88       }
  89
  90       for(int docIDX=0;docIDX<numDocs;docIDX++) {
  91         final Document doc = new Document();
  92         doc.add(newField("string", _TestUtil.randomRealisticUnicodeString(random), Field.Index.NOT_ANALYZED));
  93         doc.add(newField("text", content[random.nextInt(content.length)], Field.Index.ANALYZED));
  94         doc.add(new NumericField("float").setFloatValue(random.nextFloat()));
  95         final int intValue;
  96         if (random.nextInt(100) == 17) {
  97           intValue = Integer.MIN_VALUE;
  98         } else if (random.nextInt(100) == 17) {
  99           intValue = Integer.MAX_VALUE;
 100         } else {
 101           intValue = random.nextInt();
 102         }
 103         doc.add(new NumericField("int").setIntValue(intValue));
 104         if (VERBOSE) {
 105           System.out.println("  doc=" + doc);
 106         }
 107         w.addDocument(doc);
 108       }
 109
 110       reader = w.getReader();
 111       w.close();
 112     }
 113
 114     // NOTE: sometimes reader has just one segment, which is
 115     // important to test
 116     final IndexSearcher searcher = newSearcher(reader);
 117     IndexReader[] subReaders = searcher.getIndexReader().getSequentialSubReaders();
 118     if (subReaders == null) {
 119       subReaders = new IndexReader[] {searcher.getIndexReader()};
 120     }
 121     final ShardSearcher[] subSearchers = new ShardSearcher[subReaders.length];
 122
 123     for(int searcherIDX=0;searcherIDX<subSearchers.length;searcherIDX++) {
 124       subSearchers[searcherIDX] = new ShardSearcher(subReaders[searcherIDX]);
 125     }
 126
 127     final List<SortField> sortFields = new ArrayList<SortField>();
 128     sortFields.add(new SortField("string", SortField.STRING, true));
 129     sortFields.add(new SortField("string", SortField.STRING, false));
 130     sortFields.add(new SortField("int", SortField.INT, true));
 131     sortFields.add(new SortField("int", SortField.INT, false));
 132     sortFields.add(new SortField("float", SortField.FLOAT, true));
 133     sortFields.add(new SortField("float", SortField.FLOAT, false));
 134     sortFields.add(new SortField(null, SortField.SCORE, true));
 135     sortFields.add(new SortField(null, SortField.SCORE, false));
 136     sortFields.add(new SortField(null, SortField.DOC, true));
 137     sortFields.add(new SortField(null, SortField.DOC, false));
 138
 139     final int[] docStarts = new int[subSearchers.length];
 140     int docBase = 0;
 141     for(int subIDX=0;subIDX<docStarts.length;subIDX++) {
 142       docStarts[subIDX] = docBase;
 143       docBase += subReaders[subIDX].maxDoc();
 144       if (VERBOSE) {
 145         System.out.println("docStarts[" + subIDX + "]=" + docStarts[subIDX]);
 146       }
 147     }
 148
 149     for(int iter=0;iter<1000*RANDOM_MULTIPLIER;iter++) {
 150
 151       // TODO: custom FieldComp...
 152       final Query query = new TermQuery(new Term("text", tokens[random.nextInt(tokens.length)]));
 153
 154       final Sort sort;
 155       if (random.nextInt(10) == 4) {
 156         // Sort by score
 157         sort = null;
 158       } else {
 159         final SortField[] randomSortFields = new SortField[_TestUtil.nextInt(random, 1, 3)];
 160         for(int sortIDX=0;sortIDX<randomSortFields.length;sortIDX++) {
 161           randomSortFields[sortIDX] = sortFields.get(random.nextInt(sortFields.size()));
 162         }
 163         sort = new Sort(randomSortFields);
 164       }
 165
 166       final int numHits = _TestUtil.nextInt(random, 1, numDocs+5);
 167       //final int numHits = 5;
 168
 169       if (VERBOSE) {
 170         System.out.println("TEST: search query=" + query + " sort=" + sort + " numHits=" + numHits);
 171       }
 172
 173       // First search on whole index:
 174       final TopDocs topHits;
 175       if (sort == null) {
 176         topHits = searcher.search(query, numHits);
 177       } else {
 178         final TopFieldCollector c = TopFieldCollector.create(sort, numHits, true, true, true, random.nextBoolean());
 179         searcher.search(query, c);
 180         topHits = c.topDocs(0, numHits);
 181       }
 182
 183       if (VERBOSE) {
 184         System.out.println("  top search: " + topHits.totalHits + " totalHits; hits=" + (topHits.scoreDocs == null ? "null" : topHits.scoreDocs.length));
 185         if (topHits.scoreDocs != null) {
 186           for(int hitIDX=0;hitIDX<topHits.scoreDocs.length;hitIDX++) {
 187             final ScoreDoc sd = topHits.scoreDocs[hitIDX];
 188             System.out.println("    doc=" + sd.doc + " score=" + sd.score);
 189           }
 190         }
 191       }
 192
 193       // ... then all shards:
 194       final Weight w = searcher.createNormalizedWeight(query);
 195
 196       final TopDocs[] shardHits = new TopDocs[subSearchers.length];
 197       for(int shardIDX=0;shardIDX<subSearchers.length;shardIDX++) {
 198         final TopDocs subHits;
 199         final ShardSearcher subSearcher = subSearchers[shardIDX];
 200         if (sort == null) {
 201           subHits = subSearcher.search(w, numHits);
 202         } else {
 203           final TopFieldCollector c = TopFieldCollector.create(sort, numHits, true, true, true, random.nextBoolean());
 204           subSearcher.search(w, c);
 205           subHits = c.topDocs(0, numHits);
 206         }
 207         rebaseDocIDs(docStarts[shardIDX], subHits);
 208
 209         shardHits[shardIDX] = subHits;
 210         if (VERBOSE) {
 211           System.out.println("  shard=" + shardIDX + " " + subHits.totalHits + " totalHits hits=" + (subHits.scoreDocs == null ? "null" : subHits.scoreDocs.length));
 212           if (subHits.scoreDocs != null) {
 213             for(ScoreDoc sd : subHits.scoreDocs) {
 214               System.out.println("    doc=" + sd.doc + " score=" + sd.score);
 215             }
 216           }
 217         }
 218       }
 219
 220       // Merge:
 221       final TopDocs mergedHits = TopDocs.merge(sort, numHits, shardHits);
 222
 223       if (VERBOSE) {
 224         System.out.println("  mergedHits: " + mergedHits.totalHits + " totalHits; hits=" + (mergedHits.scoreDocs == null ? "null" : mergedHits.scoreDocs.length));
 225         if (mergedHits.scoreDocs != null) {
 226           for(int hitIDX=0;hitIDX<mergedHits.scoreDocs.length;hitIDX++) {
 227             final ScoreDoc sd = mergedHits.scoreDocs[hitIDX];
 228             System.out.println("    doc=" + sd.doc + " score=" + sd.score);
 229           }
 230         }
 231       }
 232       if (mergedHits.scoreDocs != null) {
 233         // Make sure the returned shards are correct:
 234         for(int hitIDX=0;hitIDX<mergedHits.scoreDocs.length;hitIDX++) {
 235           final ScoreDoc sd = mergedHits.scoreDocs[hitIDX];
 236           assertEquals("doc=" + sd.doc + " wrong shard",
 237                        ReaderUtil.subIndex(sd.doc, docStarts),
 238                        sd.shardIndex);
 239         }
 240       }
 241
 242       _TestUtil.assertEquals(topHits, mergedHits);
 243     }
 244     searcher.close();
 245     reader.close();
 246     dir.close();
 247   }
 248
 249   private void rebaseDocIDs(int docBase, TopDocs hits) {
 250     List<Integer> docFieldLocs = new ArrayList<Integer>();
 251     if (hits instanceof TopFieldDocs) {
 252       TopFieldDocs fieldHits = (TopFieldDocs) hits;
 253       for(int fieldIDX=0;fieldIDX<fieldHits.fields.length;fieldIDX++) {
 254         if (fieldHits.fields[fieldIDX].getType() == SortField.DOC) {
 255           docFieldLocs.add(fieldIDX);
 256         }
 257       }
 258     }
 259
 260     for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
 261       final ScoreDoc sd = hits.scoreDocs[hitIDX];
 262       sd.doc += docBase;
 263       if (sd instanceof FieldDoc) {
 264         final FieldDoc fd = (FieldDoc) sd;
 265         if (fd.fields != null) {
 266           for(int idx : docFieldLocs) {
 267             fd.fields[idx] = Integer.valueOf(((Integer) fd.fields[idx]).intValue() + docBase);
 268           }
 269         }
 270       }
 271     }
 272   }
 273 }