1 package org.apache.lucene.benchmark.byTask.tasks;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.util.Collection;
22 import java.util.HashSet;
24 import java.util.List;
27 import org.apache.lucene.analysis.Analyzer;
28 import org.apache.lucene.benchmark.byTask.PerfRunData;
29 import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
30 import org.apache.lucene.document.Document;
31 import org.apache.lucene.document.Fieldable;
32 import org.apache.lucene.index.IndexReader;
33 import org.apache.lucene.search.Collector;
34 import org.apache.lucene.search.TopDocs;
35 import org.apache.lucene.search.MultiTermQuery;
36 import org.apache.lucene.search.TopFieldCollector;
37 import org.apache.lucene.search.ScoreDoc;
38 import org.apache.lucene.search.TopScoreDocCollector;
39 import org.apache.lucene.search.Weight;
40 import org.apache.lucene.search.IndexSearcher;
41 import org.apache.lucene.search.Query;
42 import org.apache.lucene.search.Sort;
43 import org.apache.lucene.store.Directory;
47 * Read index (abstract) task.
48 * Sub classes implement withSearch(), withWarm(), withTraverse() and withRetrieve()
49 * methods to configure the actual action.
51 * <p>Note: All ReadTasks reuse the reader if it is already open.
52 * Otherwise a reader is opened at start and closed at the end.
54 * The <code>search.num.hits</code> config parameter sets
55 * the top number of hits to collect during searching. If
56 * <code>print.hits.field</code> is set, then each hit is
57 * printed along with the value of that field.</p>
59 * <p>Other side effects: none.
61 public abstract class ReadTask extends PerfTask {
63 private final QueryMaker queryMaker;
65 public ReadTask(PerfRunData runData) {
68 queryMaker = getQueryMaker();
74 public int doLogic() throws Exception {
77 // open reader or use existing one
78 IndexSearcher searcher = getRunData().getIndexSearcher();
82 final boolean closeSearcher;
83 if (searcher == null) {
84 // open our own reader
85 Directory dir = getRunData().getDirectory();
86 reader = IndexReader.open(dir, true);
87 searcher = new IndexSearcher(reader);
90 // use existing one; this passes +1 ref to us
91 reader = searcher.getIndexReader();
92 closeSearcher = false;
95 // optionally warm and add num docs traversed to count
98 for (int m = 0; m < reader.maxDoc(); m++) {
99 if (!reader.isDeleted(m)) {
100 doc = reader.document(m);
101 res += (doc == null ? 0 : 1);
108 Query q = queryMaker.makeQuery();
109 Sort sort = getSort();
111 final int numHits = numHits();
113 if (withCollector() == false) {
115 Weight w = searcher.createNormalizedWeight(q);
116 TopFieldCollector collector = TopFieldCollector.create(sort, numHits,
119 !w.scoresDocsOutOfOrder());
120 searcher.search(w, null, collector);
121 hits = collector.topDocs();
123 hits = searcher.search(q, numHits);
126 Collector collector = createCollector();
127 searcher.search(q, null, collector);
128 //hits = collector.topDocs();
131 final String printHitsField = getRunData().getConfig().get("print.hits.field", null);
132 if (hits != null && printHitsField != null && printHitsField.length() > 0) {
133 if (q instanceof MultiTermQuery) {
134 System.out.println("MultiTermQuery term count = " + ((MultiTermQuery) q).getTotalNumberOfTerms());
136 System.out.println("totalHits = " + hits.totalHits);
137 System.out.println("maxDoc() = " + reader.maxDoc());
138 System.out.println("numDocs() = " + reader.numDocs());
139 for(int i=0;i<hits.scoreDocs.length;i++) {
140 final int docID = hits.scoreDocs[i].doc;
141 final Document doc = reader.document(docID);
142 System.out.println(" " + i + ": doc=" + docID + " score=" + hits.scoreDocs[i].score + " " + printHitsField + " =" + doc.get(printHitsField));
146 if (withTraverse()) {
147 final ScoreDoc[] scoreDocs = hits.scoreDocs;
148 int traversalSize = Math.min(scoreDocs.length, traversalSize());
150 if (traversalSize > 0) {
151 boolean retrieve = withRetrieve();
152 int numHighlight = Math.min(numToHighlight(), scoreDocs.length);
153 Analyzer analyzer = getRunData().getAnalyzer();
154 BenchmarkHighlighter highlighter = null;
155 if (numHighlight > 0) {
156 highlighter = getBenchmarkHighlighter(q);
158 for (int m = 0; m < traversalSize; m++) {
159 int id = scoreDocs[m].doc;
162 Document document = retrieveDoc(reader, id);
163 res += document != null ? 1 : 0;
164 if (numHighlight > 0 && m < numHighlight) {
165 Collection<String> fieldsToHighlight = getFieldsToHighlight(document);
166 for (final String field : fieldsToHighlight) {
167 String text = document.get(field);
168 res += highlighter.doHighlight(reader, id, field, document, analyzer, text);
182 // Release our +1 ref from above
188 protected Collector createCollector() throws Exception {
189 return TopScoreDocCollector.create(numHits(), true);
193 protected Document retrieveDoc(IndexReader ir, int id) throws IOException {
194 return ir.document(id);
198 * Return query maker used for this task.
200 public abstract QueryMaker getQueryMaker();
203 * Return true if search should be performed.
205 public abstract boolean withSearch();
207 public boolean withCollector(){
213 * Return true if warming should be performed.
215 public abstract boolean withWarm();
218 * Return true if, with search, results should be traversed.
220 public abstract boolean withTraverse();
222 /** Whether scores should be computed (only useful with
224 public boolean withScore() {
228 /** Whether maxScores should be computed (only useful with
230 public boolean withMaxScore() {
235 * Specify the number of hits to traverse. Tasks should override this if they want to restrict the number
236 * of hits that are traversed when {@link #withTraverse()} is true. Must be greater than 0.
238 * Read task calculates the traversal as: Math.min(hits.length(), traversalSize())
240 * @return Integer.MAX_VALUE
242 public int traversalSize() {
243 return Integer.MAX_VALUE;
246 static final int DEFAULT_SEARCH_NUM_HITS = 10;
250 public void setup() throws Exception {
252 numHits = getRunData().getConfig().get("search.num.hits", DEFAULT_SEARCH_NUM_HITS);
256 * Specify the number of hits to retrieve. Tasks should override this if they want to restrict the number
257 * of hits that are collected during searching. Must be greater than 0.
259 * @return 10 by default, or search.num.hits config if set.
261 public int numHits() {
266 * Return true if, with search & results traversing, docs should be retrieved.
268 public abstract boolean withRetrieve();
271 * Set to the number of documents to highlight.
273 * @return The number of the results to highlight. O means no docs will be highlighted.
275 public int numToHighlight() {
280 * Return an appropriate highlighter to be used with
283 protected BenchmarkHighlighter getBenchmarkHighlighter(Query q){
287 protected Sort getSort() {
292 * Define the fields to highlight. Base implementation returns all fields
293 * @param document The Document
294 * @return A Collection of Field names (Strings)
296 protected Collection<String> getFieldsToHighlight(Document document) {
297 List<Fieldable> fieldables = document.getFields();
298 Set<String> result = new HashSet<String>(fieldables.size());
299 for (final Fieldable fieldable : fieldables) {
300 result.add(fieldable.name());