pylucene 3.5.0-3

[pylucene.git] / lucene-java-3.5.0 / lucene / contrib / benchmark / src / java / org / apache / lucene / benchmark / quality / QualityBenchmark.java
diff --git a/lucene-java-3.5.0/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityBenchmark.java b/lucene-java-3.5.0/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityBenchmark.java

new file mode 100644 (file)

index 0000000..0e91b6f
--- /dev/null
+++ b/lucene-java-3.5.0/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityBenchmark.java
@@ -0,0 +1,161 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+
+import org.apache.lucene.benchmark.quality.utils.DocNameExtractor;
+import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TopDocs;
+
+/**
+ * Main entry point for running a quality benchmark.
+ * <p>
+ * There are two main configurations for running a quality benchmark: <ul>
+ * <li>Against existing judgements.</li>
+ * <li>For submission (e.g. for a contest).</li>
+ * </ul>
+ * The first configuration requires a non null
+ * {@link org.apache.lucene.benchmark.quality.Judge Judge}. 
+ * The second configuration requires a non null 
+ * {@link org.apache.lucene.benchmark.quality.utils.SubmissionReport SubmissionLogger}.
+ */
+public class QualityBenchmark {
+
+  /** Quality Queries that this quality benchmark would execute. */
+  protected QualityQuery qualityQueries[];
+  
+  /** Parser for turning QualityQueries into Lucene Queries. */
+  protected QualityQueryParser qqParser;
+  
+  /** Index to be searched. */
+  protected IndexSearcher searcher;
+
+  /** index field to extract doc name for each search result; used for judging the results. */  
+  protected String docNameField;
+  
+  /** maximal number of queries that this quality benchmark runs. Default: maxint. Useful for debugging. */
+  private int maxQueries = Integer.MAX_VALUE;
+  
+  /** maximal number of results to collect for each query. Default: 1000. */
+  private int maxResults = 1000;
+
+  /**
+   * Create a QualityBenchmark.
+   * @param qqs quality queries to run.
+   * @param qqParser parser for turning QualityQueries into Lucene Queries. 
+   * @param searcher index to be searched.
+   * @param docNameField name of field containing the document name.
+   *        This allows to extract the doc name for search results,
+   *        and is important for judging the results.  
+   */
+  public QualityBenchmark(QualityQuery qqs[], QualityQueryParser qqParser, 
+      IndexSearcher searcher, String docNameField) {
+    this.qualityQueries = qqs;
+    this.qqParser = qqParser;
+    this.searcher = searcher;
+    this.docNameField = docNameField;
+  }
+
+  /**
+   * Run the quality benchmark.
+   * @param judge the judge that can tell if a certain result doc is relevant for a certain quality query. 
+   *        If null, no judgements would be made. Usually null for a submission run. 
+   * @param submitRep submission report is created if non null.
+   * @param qualityLog If not null, quality run data would be printed for each query.
+   * @return QualityStats of each quality query that was executed.
+   * @throws Exception if quality benchmark failed to run.
+   */
+  public  QualityStats [] execute(Judge judge, SubmissionReport submitRep, 
+                                  PrintWriter qualityLog) throws Exception {
+    int nQueries = Math.min(maxQueries, qualityQueries.length);
+    QualityStats stats[] = new QualityStats[nQueries]; 
+    for (int i=0; i<nQueries; i++) {
+      QualityQuery qq = qualityQueries[i];
+      // generate query
+      Query q = qqParser.parse(qq);
+      // search with this query 
+      long t1 = System.currentTimeMillis();
+      TopDocs td = searcher.search(q,null,maxResults);
+      long searchTime = System.currentTimeMillis()-t1;
+      //most likely we either submit or judge, but check both 
+      if (judge!=null) {
+        stats[i] = analyzeQueryResults(qq, q, td, judge, qualityLog, searchTime);
+      }
+      if (submitRep!=null) {
+        submitRep.report(qq,td,docNameField,searcher);
+      }
+    } 
+    if (submitRep!=null) {
+      submitRep.flush();
+    }
+    return stats;
+  }
+  
+  /* Analyze/judge results for a single quality query; optionally log them. */  
+  private QualityStats analyzeQueryResults(QualityQuery qq, Query q, TopDocs td, Judge judge, PrintWriter logger, long searchTime) throws IOException {
+    QualityStats stts = new QualityStats(judge.maxRecall(qq),searchTime);
+    ScoreDoc sd[] = td.scoreDocs;
+    long t1 = System.currentTimeMillis(); // extraction of first doc name we measure also construction of doc name extractor, just in case.
+    DocNameExtractor xt = new DocNameExtractor(docNameField);
+    for (int i=0; i<sd.length; i++) {
+      String docName = xt.docName(searcher,sd[i].doc);
+      long docNameExtractTime = System.currentTimeMillis() - t1;
+      t1 = System.currentTimeMillis();
+      boolean isRelevant = judge.isRelevant(docName,qq);
+      stts.addResult(i+1,isRelevant, docNameExtractTime);
+    }
+    if (logger!=null) {
+      logger.println(qq.getQueryID()+"  -  "+q);
+      stts.log(qq.getQueryID()+" Stats:",1,logger,"  ");
+    }
+    return stts;
+  }
+
+  /**
+   * @return the maximum number of quality queries to run. Useful at debugging.
+   */
+  public int getMaxQueries() {
+    return maxQueries;
+  }
+
+  /**
+   * Set the maximum number of quality queries to run. Useful at debugging.
+   */
+  public void setMaxQueries(int maxQueries) {
+    this.maxQueries = maxQueries;
+  }
+
+  /**
+   * @return the maximum number of results to collect for each quality query.
+   */
+  public int getMaxResults() {
+    return maxResults;
+  }
+
+  /**
+   * set the maximum number of results to collect for each quality query.
+   */
+  public void setMaxResults(int maxResults) {
+    this.maxResults = maxResults;
+  }
+
+}