lucene-java-3.5.0/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityBenchmark.java

   1 /**
   2  * Licensed to the Apache Software Foundation (ASF) under one or more
   3  * contributor license agreements.  See the NOTICE file distributed with
   4  * this work for additional information regarding copyright ownership.
   5  * The ASF licenses this file to You under the Apache License, Version 2.0
   6  * (the "License"); you may not use this file except in compliance with
   7  * the License.  You may obtain a copy of the License at
   8  *
   9  *     http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing, software
  12  * distributed under the License is distributed on an "AS IS" BASIS,
  13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  * See the License for the specific language governing permissions and
  15  * limitations under the License.
  16  */
  17 package org.apache.lucene.benchmark.quality;
  18
  19 import java.io.IOException;
  20 import java.io.PrintWriter;
  21
  22 import org.apache.lucene.benchmark.quality.utils.DocNameExtractor;
  23 import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
  24 import org.apache.lucene.search.Query;
  25 import org.apache.lucene.search.ScoreDoc;
  26 import org.apache.lucene.search.IndexSearcher;
  27 import org.apache.lucene.search.TopDocs;
  28
  29 /**
  30  * Main entry point for running a quality benchmark.
  31  * <p>
  32  * There are two main configurations for running a quality benchmark: <ul>
  33  * <li>Against existing judgements.</li>
  34  * <li>For submission (e.g. for a contest).</li>
  35  * </ul>
  36  * The first configuration requires a non null
  37  * {@link org.apache.lucene.benchmark.quality.Judge Judge}.
  38  * The second configuration requires a non null
  39  * {@link org.apache.lucene.benchmark.quality.utils.SubmissionReport SubmissionLogger}.
  40  */
  41 public class QualityBenchmark {
  42
  43   /** Quality Queries that this quality benchmark would execute. */
  44   protected QualityQuery qualityQueries[];
  45
  46   /** Parser for turning QualityQueries into Lucene Queries. */
  47   protected QualityQueryParser qqParser;
  48
  49   /** Index to be searched. */
  50   protected IndexSearcher searcher;
  51
  52   /** index field to extract doc name for each search result; used for judging the results. */
  53   protected String docNameField;
  54
  55   /** maximal number of queries that this quality benchmark runs. Default: maxint. Useful for debugging. */
  56   private int maxQueries = Integer.MAX_VALUE;
  57
  58   /** maximal number of results to collect for each query. Default: 1000. */
  59   private int maxResults = 1000;
  60
  61   /**
  62    * Create a QualityBenchmark.
  63    * @param qqs quality queries to run.
  64    * @param qqParser parser for turning QualityQueries into Lucene Queries.
  65    * @param searcher index to be searched.
  66    * @param docNameField name of field containing the document name.
  67    *        This allows to extract the doc name for search results,
  68    *        and is important for judging the results.
  69    */
  70   public QualityBenchmark(QualityQuery qqs[], QualityQueryParser qqParser,
  71       IndexSearcher searcher, String docNameField) {
  72     this.qualityQueries = qqs;
  73     this.qqParser = qqParser;
  74     this.searcher = searcher;
  75     this.docNameField = docNameField;
  76   }
  77
  78   /**
  79    * Run the quality benchmark.
  80    * @param judge the judge that can tell if a certain result doc is relevant for a certain quality query.
  81    *        If null, no judgements would be made. Usually null for a submission run.
  82    * @param submitRep submission report is created if non null.
  83    * @param qualityLog If not null, quality run data would be printed for each query.
  84    * @return QualityStats of each quality query that was executed.
  85    * @throws Exception if quality benchmark failed to run.
  86    */
  87   public  QualityStats [] execute(Judge judge, SubmissionReport submitRep,
  88                                   PrintWriter qualityLog) throws Exception {
  89     int nQueries = Math.min(maxQueries, qualityQueries.length);
  90     QualityStats stats[] = new QualityStats[nQueries];
  91     for (int i=0; i<nQueries; i++) {
  92       QualityQuery qq = qualityQueries[i];
  93       // generate query
  94       Query q = qqParser.parse(qq);
  95       // search with this query
  96       long t1 = System.currentTimeMillis();
  97       TopDocs td = searcher.search(q,null,maxResults);
  98       long searchTime = System.currentTimeMillis()-t1;
  99       //most likely we either submit or judge, but check both
 100       if (judge!=null) {
 101         stats[i] = analyzeQueryResults(qq, q, td, judge, qualityLog, searchTime);
 102       }
 103       if (submitRep!=null) {
 104         submitRep.report(qq,td,docNameField,searcher);
 105       }
 106     }
 107     if (submitRep!=null) {
 108       submitRep.flush();
 109     }
 110     return stats;
 111   }
 112
 113   /* Analyze/judge results for a single quality query; optionally log them. */
 114   private QualityStats analyzeQueryResults(QualityQuery qq, Query q, TopDocs td, Judge judge, PrintWriter logger, long searchTime) throws IOException {
 115     QualityStats stts = new QualityStats(judge.maxRecall(qq),searchTime);
 116     ScoreDoc sd[] = td.scoreDocs;
 117     long t1 = System.currentTimeMillis(); // extraction of first doc name we measure also construction of doc name extractor, just in case.
 118     DocNameExtractor xt = new DocNameExtractor(docNameField);
 119     for (int i=0; i<sd.length; i++) {
 120       String docName = xt.docName(searcher,sd[i].doc);
 121       long docNameExtractTime = System.currentTimeMillis() - t1;
 122       t1 = System.currentTimeMillis();
 123       boolean isRelevant = judge.isRelevant(docName,qq);
 124       stts.addResult(i+1,isRelevant, docNameExtractTime);
 125     }
 126     if (logger!=null) {
 127       logger.println(qq.getQueryID()+"  -  "+q);
 128       stts.log(qq.getQueryID()+" Stats:",1,logger,"  ");
 129     }
 130     return stts;
 131   }
 132
 133   /**
 134    * @return the maximum number of quality queries to run. Useful at debugging.
 135    */
 136   public int getMaxQueries() {
 137     return maxQueries;
 138   }
 139
 140   /**
 141    * Set the maximum number of quality queries to run. Useful at debugging.
 142    */
 143   public void setMaxQueries(int maxQueries) {
 144     this.maxQueries = maxQueries;
 145   }
 146
 147   /**
 148    * @return the maximum number of results to collect for each quality query.
 149    */
 150   public int getMaxResults() {
 151     return maxResults;
 152   }
 153
 154   /**
 155    * set the maximum number of results to collect for each quality query.
 156    */
 157   public void setMaxResults(int maxResults) {
 158     this.maxResults = maxResults;
 159   }
 160
 161 }