2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
17 package org.apache.lucene.benchmark.quality.trec;
19 import java.io.BufferedReader;
20 import java.io.IOException;
21 import java.io.PrintWriter;
22 import java.util.ArrayList;
23 import java.util.HashMap;
24 import java.util.StringTokenizer;
26 import org.apache.lucene.benchmark.quality.Judge;
27 import org.apache.lucene.benchmark.quality.QualityQuery;
30 * Judge if given document is relevant to given quality query, based on Trec format for judgements.
32 public class TrecJudge implements Judge {
34 HashMap<String,QRelJudgement> judgements;
37 * Constructor from a reader.
39 * Expected input format:
41 * qnum 0 doc-name is-relevant
48 * @param reader where judgments are read from.
51 public TrecJudge (BufferedReader reader) throws IOException {
52 judgements = new HashMap<String,QRelJudgement>();
53 QRelJudgement curr = null;
58 while (null!=(line=reader.readLine())) {
60 if (line.length()==0 || '#'==line.charAt(0)) {
63 StringTokenizer st = new StringTokenizer(line);
64 String queryID = st.nextToken();
66 String docName = st.nextToken();
67 boolean relevant = !zero.equals(st.nextToken());
68 assert !st.hasMoreTokens() : "wrong format: "+line+" next: "+st.nextToken();
69 if (relevant) { // only keep relevant docs
70 if (curr==null || !curr.queryID.equals(queryID)) {
71 curr = judgements.get(queryID);
73 curr = new QRelJudgement(queryID);
74 judgements.put(queryID,curr);
77 curr.addRelevandDoc(docName);
86 public boolean isRelevant(String docName, QualityQuery query) {
87 QRelJudgement qrj = judgements.get(query.getQueryID());
88 return qrj!=null && qrj.isRelevant(docName);
91 /** single Judgement of a trec quality query */
92 private static class QRelJudgement {
93 private String queryID;
94 private HashMap<String,String> relevantDocs;
96 QRelJudgement(String queryID) {
97 this.queryID = queryID;
98 relevantDocs = new HashMap<String,String>();
101 public void addRelevandDoc(String docName) {
102 relevantDocs.put(docName,docName);
105 boolean isRelevant(String docName) {
106 return relevantDocs.containsKey(docName);
109 public int maxRecall() {
110 return relevantDocs.size();
115 public boolean validateData(QualityQuery[] qq, PrintWriter logger) {
116 HashMap<String,QRelJudgement> missingQueries = new HashMap<String, QRelJudgement>(judgements);
117 ArrayList<String> missingJudgements = new ArrayList<String>();
118 for (int i=0; i<qq.length; i++) {
119 String id = qq[i].getQueryID();
120 if (missingQueries.containsKey(id)) {
121 missingQueries.remove(id);
123 missingJudgements.add(id);
126 boolean isValid = true;
127 if (missingJudgements.size()>0) {
130 logger.println("WARNING: "+missingJudgements.size()+" queries have no judgments! - ");
131 for (int i=0; i<missingJudgements.size(); i++) {
132 logger.println(" "+ missingJudgements.get(i));
136 if (missingQueries.size()>0) {
139 logger.println("WARNING: "+missingQueries.size()+" judgments match no query! - ");
140 for (final String id : missingQueries.keySet()) {
141 logger.println(" "+id);
149 public int maxRecall(QualityQuery query) {
150 QRelJudgement qrj = judgements.get(query.getQueryID());
152 return qrj.maxRecall();