2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
17 package org.apache.lucene.benchmark.quality;
19 import java.io.IOException;
20 import java.io.PrintWriter;
22 import org.apache.lucene.benchmark.quality.utils.DocNameExtractor;
23 import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
24 import org.apache.lucene.search.Query;
25 import org.apache.lucene.search.ScoreDoc;
26 import org.apache.lucene.search.Searcher;
27 import org.apache.lucene.search.TopDocs;
30 * Main entry point for running a quality benchmark.
32 * There are two main configurations for running a quality benchmark: <ul>
33 * <li>Against existing judgements.</li>
34 * <li>For submission (e.g. for a contest).</li>
36 * The first configuration requires a non null
37 * {@link org.apache.lucene.benchmark.quality.Judge Judge}.
38 * The second configuration requires a non null
39 * {@link org.apache.lucene.benchmark.quality.utils.SubmissionReport SubmissionLogger}.
41 public class QualityBenchmark {
43 /** Quality Queries that this quality benchmark would execute. */
44 protected QualityQuery qualityQueries[];
46 /** Parser for turning QualityQueries into Lucene Queries. */
47 protected QualityQueryParser qqParser;
49 /** Index to be searched. */
50 protected Searcher searcher;
52 /** index field to extract doc name for each search result; used for judging the results. */
53 protected String docNameField;
55 /** maximal number of queries that this quality benchmark runs. Default: maxint. Useful for debugging. */
56 private int maxQueries = Integer.MAX_VALUE;
58 /** maximal number of results to collect for each query. Default: 1000. */
59 private int maxResults = 1000;
62 * Create a QualityBenchmark.
63 * @param qqs quality queries to run.
64 * @param qqParser parser for turning QualityQueries into Lucene Queries.
65 * @param searcher index to be searched.
66 * @param docNameField name of field containing the document name.
67 * This allows to extract the doc name for search results,
68 * and is important for judging the results.
70 public QualityBenchmark(QualityQuery qqs[], QualityQueryParser qqParser,
71 Searcher searcher, String docNameField) {
72 this.qualityQueries = qqs;
73 this.qqParser = qqParser;
74 this.searcher = searcher;
75 this.docNameField = docNameField;
79 * Run the quality benchmark.
80 * @param judge the judge that can tell if a certain result doc is relevant for a certain quality query.
81 * If null, no judgements would be made. Usually null for a submission run.
82 * @param submitRep submission report is created if non null.
83 * @param qualityLog If not null, quality run data would be printed for each query.
84 * @return QualityStats of each quality query that was executed.
85 * @throws Exception if quality benchmark failed to run.
87 public QualityStats [] execute(Judge judge, SubmissionReport submitRep,
88 PrintWriter qualityLog) throws Exception {
89 int nQueries = Math.min(maxQueries, qualityQueries.length);
90 QualityStats stats[] = new QualityStats[nQueries];
91 for (int i=0; i<nQueries; i++) {
92 QualityQuery qq = qualityQueries[i];
94 Query q = qqParser.parse(qq);
95 // search with this query
96 long t1 = System.currentTimeMillis();
97 TopDocs td = searcher.search(q,null,maxResults);
98 long searchTime = System.currentTimeMillis()-t1;
99 //most likely we either submit or judge, but check both
101 stats[i] = analyzeQueryResults(qq, q, td, judge, qualityLog, searchTime);
103 if (submitRep!=null) {
104 submitRep.report(qq,td,docNameField,searcher);
107 if (submitRep!=null) {
113 /* Analyze/judge results for a single quality query; optionally log them. */
114 private QualityStats analyzeQueryResults(QualityQuery qq, Query q, TopDocs td, Judge judge, PrintWriter logger, long searchTime) throws IOException {
115 QualityStats stts = new QualityStats(judge.maxRecall(qq),searchTime);
116 ScoreDoc sd[] = td.scoreDocs;
117 long t1 = System.currentTimeMillis(); // extraction of first doc name we measure also construction of doc name extractor, just in case.
118 DocNameExtractor xt = new DocNameExtractor(docNameField);
119 for (int i=0; i<sd.length; i++) {
120 String docName = xt.docName(searcher,sd[i].doc);
121 long docNameExtractTime = System.currentTimeMillis() - t1;
122 t1 = System.currentTimeMillis();
123 boolean isRelevant = judge.isRelevant(docName,qq);
124 stts.addResult(i+1,isRelevant, docNameExtractTime);
127 logger.println(qq.getQueryID()+" - "+q);
128 stts.log(qq.getQueryID()+" Stats:",1,logger," ");
134 * @return the maximum number of quality queries to run. Useful at debugging.
136 public int getMaxQueries() {
141 * Set the maximum number of quality queries to run. Useful at debugging.
143 public void setMaxQueries(int maxQueries) {
144 this.maxQueries = maxQueries;
148 * @return the maximum number of results to collect for each quality query.
150 public int getMaxResults() {
155 * set the maximum number of results to collect for each quality query.
157 public void setMaxResults(int maxResults) {
158 this.maxResults = maxResults;