1 package org.apache.lucene.benchmark.quality.trec;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.benchmark.quality.trec.TrecJudge;
21 import org.apache.lucene.benchmark.quality.trec.TrecTopicsReader;
22 import org.apache.lucene.benchmark.quality.utils.SimpleQQParser;
23 import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
24 import org.apache.lucene.benchmark.quality.*;
25 import org.apache.lucene.search.IndexSearcher;
26 import org.apache.lucene.search.Searcher;
27 import org.apache.lucene.store.FSDirectory;
29 import java.io.BufferedReader;
31 import java.io.FileReader;
32 import java.io.PrintWriter;
33 import java.util.HashSet;
41 public class QueryDriver {
42 public static void main(String[] args) throws Exception {
43 if (args.length < 4 || args.length > 5) {
44 System.err.println("Usage: QueryDriver <topicsFile> <qrelsFile> <submissionFile> <indexDir> [querySpec]");
45 System.err.println("topicsFile: input file containing queries");
46 System.err.println("qrelsFile: input file containing relevance judgements");
47 System.err.println("submissionFile: output submission file for trec_eval");
48 System.err.println("indexDir: index directory");
49 System.err.println("querySpec: string composed of fields to use in query consisting of T=title,D=description,N=narrative:");
50 System.err.println("\texample: TD (query on Title + Description). The default is T (title only)");
54 File topicsFile = new File(args[0]);
55 File qrelsFile = new File(args[1]);
56 SubmissionReport submitLog = new SubmissionReport(new PrintWriter(args[2]), "lucene");
57 FSDirectory dir = FSDirectory.open(new File(args[3]));
58 String fieldSpec = args.length == 5 ? args[4] : "T"; // default to Title-only if not specified.
59 Searcher searcher = new IndexSearcher(dir, true);
61 int maxResults = 1000;
62 String docNameField = "docname";
64 PrintWriter logger = new PrintWriter(System.out, true);
66 // use trec utilities to read trec topics into quality queries
67 TrecTopicsReader qReader = new TrecTopicsReader();
68 QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new FileReader(topicsFile)));
70 // prepare judge, with trec utilities that read from a QRels file
71 Judge judge = new TrecJudge(new BufferedReader(new FileReader(qrelsFile)));
73 // validate topics & judgments match each other
74 judge.validateData(qqs, logger);
76 Set<String> fieldSet = new HashSet<String>();
77 if (fieldSpec.indexOf('T') >= 0) fieldSet.add("title");
78 if (fieldSpec.indexOf('D') >= 0) fieldSet.add("description");
79 if (fieldSpec.indexOf('N') >= 0) fieldSet.add("narrative");
81 // set the parsing of quality queries into Lucene queries.
82 QualityQueryParser qqParser = new SimpleQQParser(fieldSet.toArray(new String[0]), "body");
85 QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
86 qrun.setMaxResults(maxResults);
87 QualityStats stats[] = qrun.execute(judge, submitLog, logger);
89 // print an avarage sum of the results
90 QualityStats avg = QualityStats.average(stats);
91 avg.log("SUMMARY", 2, logger, " ");