--- /dev/null
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.quality.trec;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.StringTokenizer;
+
+import org.apache.lucene.benchmark.quality.Judge;
+import org.apache.lucene.benchmark.quality.QualityQuery;
+
+/**
+ * Judge if given document is relevant to given quality query, based on Trec format for judgements.
+ */
+public class TrecJudge implements Judge {
+
+ HashMap<String,QRelJudgement> judgements;
+
+ /**
+ * Constructor from a reader.
+ * <p>
+ * Expected input format:
+ * <pre>
+ * qnum 0 doc-name is-relevant
+ * </pre>
+ * Two sample lines:
+ * <pre>
+ * 19 0 doc303 1
+ * 19 0 doc7295 0
+ * </pre>
+ * @param reader where judgments are read from.
+ * @throws IOException
+ */
+ public TrecJudge (BufferedReader reader) throws IOException {
+ judgements = new HashMap<String,QRelJudgement>();
+ QRelJudgement curr = null;
+ String zero = "0";
+ String line;
+
+ try {
+ while (null!=(line=reader.readLine())) {
+ line = line.trim();
+ if (line.length()==0 || '#'==line.charAt(0)) {
+ continue;
+ }
+ StringTokenizer st = new StringTokenizer(line);
+ String queryID = st.nextToken();
+ st.nextToken();
+ String docName = st.nextToken();
+ boolean relevant = !zero.equals(st.nextToken());
+ assert !st.hasMoreTokens() : "wrong format: "+line+" next: "+st.nextToken();
+ if (relevant) { // only keep relevant docs
+ if (curr==null || !curr.queryID.equals(queryID)) {
+ curr = judgements.get(queryID);
+ if (curr==null) {
+ curr = new QRelJudgement(queryID);
+ judgements.put(queryID,curr);
+ }
+ }
+ curr.addRelevandDoc(docName);
+ }
+ }
+ } finally {
+ reader.close();
+ }
+ }
+
+ // inherit javadocs
+ public boolean isRelevant(String docName, QualityQuery query) {
+ QRelJudgement qrj = judgements.get(query.getQueryID());
+ return qrj!=null && qrj.isRelevant(docName);
+ }
+
+ /** single Judgement of a trec quality query */
+ private static class QRelJudgement {
+ String queryID;
+ private HashMap<String,String> relevantDocs;
+
+ QRelJudgement(String queryID) {
+ this.queryID = queryID;
+ relevantDocs = new HashMap<String,String>();
+ }
+
+ public void addRelevandDoc(String docName) {
+ relevantDocs.put(docName,docName);
+ }
+
+ boolean isRelevant(String docName) {
+ return relevantDocs.containsKey(docName);
+ }
+
+ public int maxRecall() {
+ return relevantDocs.size();
+ }
+ }
+
+ // inherit javadocs
+ public boolean validateData(QualityQuery[] qq, PrintWriter logger) {
+ HashMap<String,QRelJudgement> missingQueries = new HashMap<String, QRelJudgement>(judgements);
+ ArrayList<String> missingJudgements = new ArrayList<String>();
+ for (int i=0; i<qq.length; i++) {
+ String id = qq[i].getQueryID();
+ if (missingQueries.containsKey(id)) {
+ missingQueries.remove(id);
+ } else {
+ missingJudgements.add(id);
+ }
+ }
+ boolean isValid = true;
+ if (missingJudgements.size()>0) {
+ isValid = false;
+ if (logger!=null) {
+ logger.println("WARNING: "+missingJudgements.size()+" queries have no judgments! - ");
+ for (int i=0; i<missingJudgements.size(); i++) {
+ logger.println(" "+ missingJudgements.get(i));
+ }
+ }
+ }
+ if (missingQueries.size()>0) {
+ isValid = false;
+ if (logger!=null) {
+ logger.println("WARNING: "+missingQueries.size()+" judgments match no query! - ");
+ for (final String id : missingQueries.keySet()) {
+ logger.println(" "+id);
+ }
+ }
+ }
+ return isValid;
+ }
+
+ // inherit javadocs
+ public int maxRecall(QualityQuery query) {
+ QRelJudgement qrj = judgements.get(query.getQueryID());
+ if (qrj!=null) {
+ return qrj.maxRecall();
+ }
+ return 0;
+ }
+}