lucene-java-3.4.0/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java

   1 package org.apache.lucene.benchmark.byTask.tasks;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.IOException;
  21 import java.util.Collection;
  22 import java.util.HashSet;
  23
  24 import java.util.List;
  25 import java.util.Set;
  26
  27 import org.apache.lucene.analysis.Analyzer;
  28 import org.apache.lucene.benchmark.byTask.PerfRunData;
  29 import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
  30 import org.apache.lucene.document.Document;
  31 import org.apache.lucene.document.Fieldable;
  32 import org.apache.lucene.index.IndexReader;
  33 import org.apache.lucene.search.Collector;
  34 import org.apache.lucene.search.TopDocs;
  35 import org.apache.lucene.search.MultiTermQuery;
  36 import org.apache.lucene.search.TopFieldCollector;
  37 import org.apache.lucene.search.ScoreDoc;
  38 import org.apache.lucene.search.TopScoreDocCollector;
  39 import org.apache.lucene.search.Weight;
  40 import org.apache.lucene.search.IndexSearcher;
  41 import org.apache.lucene.search.Query;
  42 import org.apache.lucene.search.Sort;
  43 import org.apache.lucene.store.Directory;
  44
  45
  46 /**
  47  * Read index (abstract) task.
  48  * Sub classes implement withSearch(), withWarm(), withTraverse() and withRetrieve()
  49  * methods to configure the actual action.
  50  * <p/>
  51  * <p>Note: All ReadTasks reuse the reader if it is already open.
  52  * Otherwise a reader is opened at start and closed at the end.
  53  * <p>
  54  * The <code>search.num.hits</code> config parameter sets
  55  * the top number of hits to collect during searching.  If
  56  * <code>print.hits.field</code> is set, then each hit is
  57  * printed along with the value of that field.</p>
  58  *
  59  * <p>Other side effects: none.
  60  */
  61 public abstract class ReadTask extends PerfTask {
  62
  63   private final QueryMaker queryMaker;
  64
  65   public ReadTask(PerfRunData runData) {
  66     super(runData);
  67     if (withSearch()) {
  68       queryMaker = getQueryMaker();
  69     } else {
  70       queryMaker = null;
  71     }
  72   }
  73   @Override
  74   public int doLogic() throws Exception {
  75     int res = 0;
  76
  77     // open reader or use existing one
  78     IndexSearcher searcher = getRunData().getIndexSearcher();
  79
  80     IndexReader reader;
  81
  82     final boolean closeSearcher;
  83     if (searcher == null) {
  84       // open our own reader
  85       Directory dir = getRunData().getDirectory();
  86       reader = IndexReader.open(dir, true);
  87       searcher = new IndexSearcher(reader);
  88       closeSearcher = true;
  89     } else {
  90       // use existing one; this passes +1 ref to us
  91       reader = searcher.getIndexReader();
  92       closeSearcher = false;
  93     }
  94
  95     // optionally warm and add num docs traversed to count
  96     if (withWarm()) {
  97       Document doc = null;
  98       for (int m = 0; m < reader.maxDoc(); m++) {
  99         if (!reader.isDeleted(m)) {
 100           doc = reader.document(m);
 101           res += (doc == null ? 0 : 1);
 102         }
 103       }
 104     }
 105
 106     if (withSearch()) {
 107       res++;
 108       Query q = queryMaker.makeQuery();
 109       Sort sort = getSort();
 110       TopDocs hits = null;
 111       final int numHits = numHits();
 112       if (numHits > 0) {
 113         if (withCollector() == false) {
 114           if (sort != null) {
 115             Weight w = searcher.createNormalizedWeight(q);
 116             TopFieldCollector collector = TopFieldCollector.create(sort, numHits,
 117                                                                    true, withScore(),
 118                                                                    withMaxScore(),
 119                                                                    !w.scoresDocsOutOfOrder());
 120             searcher.search(w, null, collector);
 121             hits = collector.topDocs();
 122           } else {
 123             hits = searcher.search(q, numHits);
 124           }
 125         } else {
 126           Collector collector = createCollector();
 127           searcher.search(q, null, collector);
 128           //hits = collector.topDocs();
 129         }
 130
 131         final String printHitsField = getRunData().getConfig().get("print.hits.field", null);
 132         if (hits != null && printHitsField != null && printHitsField.length() > 0) {
 133           if (q instanceof MultiTermQuery) {
 134             System.out.println("MultiTermQuery term count = " + ((MultiTermQuery) q).getTotalNumberOfTerms());
 135           }
 136           System.out.println("totalHits = " + hits.totalHits);
 137           System.out.println("maxDoc()  = " + reader.maxDoc());
 138           System.out.println("numDocs() = " + reader.numDocs());
 139           for(int i=0;i<hits.scoreDocs.length;i++) {
 140             final int docID = hits.scoreDocs[i].doc;
 141             final Document doc = reader.document(docID);
 142             System.out.println("  " + i + ": doc=" + docID + " score=" + hits.scoreDocs[i].score + " " + printHitsField + " =" + doc.get(printHitsField));
 143           }
 144         }
 145
 146         if (withTraverse()) {
 147           final ScoreDoc[] scoreDocs = hits.scoreDocs;
 148           int traversalSize = Math.min(scoreDocs.length, traversalSize());
 149
 150           if (traversalSize > 0) {
 151             boolean retrieve = withRetrieve();
 152             int numHighlight = Math.min(numToHighlight(), scoreDocs.length);
 153             Analyzer analyzer = getRunData().getAnalyzer();
 154             BenchmarkHighlighter highlighter = null;
 155             if (numHighlight > 0) {
 156               highlighter = getBenchmarkHighlighter(q);
 157             }
 158             for (int m = 0; m < traversalSize; m++) {
 159               int id = scoreDocs[m].doc;
 160               res++;
 161               if (retrieve) {
 162                 Document document = retrieveDoc(reader, id);
 163                 res += document != null ? 1 : 0;
 164                 if (numHighlight > 0 && m < numHighlight) {
 165                   Collection<String> fieldsToHighlight = getFieldsToHighlight(document);
 166                   for (final String field : fieldsToHighlight) {
 167                     String text = document.get(field);
 168                     res += highlighter.doHighlight(reader, id, field, document, analyzer, text);
 169                   }
 170                 }
 171               }
 172             }
 173           }
 174         }
 175       }
 176     }
 177
 178     if (closeSearcher) {
 179       searcher.close();
 180       reader.close();
 181     } else {
 182       // Release our +1 ref from above
 183       reader.decRef();
 184     }
 185     return res;
 186   }
 187
 188   protected Collector createCollector() throws Exception {
 189     return TopScoreDocCollector.create(numHits(), true);
 190   }
 191
 192
 193   protected Document retrieveDoc(IndexReader ir, int id) throws IOException {
 194     return ir.document(id);
 195   }
 196
 197   /**
 198    * Return query maker used for this task.
 199    */
 200   public abstract QueryMaker getQueryMaker();
 201
 202   /**
 203    * Return true if search should be performed.
 204    */
 205   public abstract boolean withSearch();
 206
 207   public boolean withCollector(){
 208     return false;
 209   }
 210
 211
 212   /**
 213    * Return true if warming should be performed.
 214    */
 215   public abstract boolean withWarm();
 216
 217   /**
 218    * Return true if, with search, results should be traversed.
 219    */
 220   public abstract boolean withTraverse();
 221
 222   /** Whether scores should be computed (only useful with
 223    *  field sort) */
 224   public boolean withScore() {
 225     return true;
 226   }
 227
 228   /** Whether maxScores should be computed (only useful with
 229    *  field sort) */
 230   public boolean withMaxScore() {
 231     return true;
 232   }
 233
 234   /**
 235    * Specify the number of hits to traverse.  Tasks should override this if they want to restrict the number
 236    * of hits that are traversed when {@link #withTraverse()} is true. Must be greater than 0.
 237    * <p/>
 238    * Read task calculates the traversal as: Math.min(hits.length(), traversalSize())
 239    *
 240    * @return Integer.MAX_VALUE
 241    */
 242   public int traversalSize() {
 243     return Integer.MAX_VALUE;
 244   }
 245
 246   static final int DEFAULT_SEARCH_NUM_HITS = 10;
 247   private int numHits;
 248
 249   @Override
 250   public void setup() throws Exception {
 251     super.setup();
 252     numHits = getRunData().getConfig().get("search.num.hits", DEFAULT_SEARCH_NUM_HITS);
 253   }
 254
 255   /**
 256    * Specify the number of hits to retrieve.  Tasks should override this if they want to restrict the number
 257    * of hits that are collected during searching. Must be greater than 0.
 258    *
 259    * @return 10 by default, or search.num.hits config if set.
 260    */
 261   public int numHits() {
 262     return numHits;
 263   }
 264
 265   /**
 266    * Return true if, with search & results traversing, docs should be retrieved.
 267    */
 268   public abstract boolean withRetrieve();
 269
 270   /**
 271    * Set to the number of documents to highlight.
 272    *
 273    * @return The number of the results to highlight.  O means no docs will be highlighted.
 274    */
 275   public int numToHighlight() {
 276     return 0;
 277   }
 278
 279   /**
 280    * Return an appropriate highlighter to be used with
 281    * highlighting tasks
 282    */
 283   protected BenchmarkHighlighter getBenchmarkHighlighter(Query q){
 284     return null;
 285   }
 286
 287   protected Sort getSort() {
 288     return null;
 289   }
 290
 291   /**
 292    * Define the fields to highlight.  Base implementation returns all fields
 293    * @param document The Document
 294    * @return A Collection of Field names (Strings)
 295    */
 296   protected Collection<String> getFieldsToHighlight(Document document) {
 297     List<Fieldable> fieldables = document.getFields();
 298     Set<String> result = new HashSet<String>(fieldables.size());
 299     for (final Fieldable fieldable : fieldables) {
 300       result.add(fieldable.name());
 301     }
 302     return result;
 303   }
 304
 305 }