1 package org.apache.lucene.benchmark.byTask.feeds;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.analysis.Analyzer;
21 import org.apache.lucene.index.Term;
22 import org.apache.lucene.queryParser.QueryParser;
23 import org.apache.lucene.search.Query;
24 import org.apache.lucene.search.WildcardQuery;
25 import org.apache.lucene.search.spans.SpanFirstQuery;
26 import org.apache.lucene.search.spans.SpanNearQuery;
27 import org.apache.lucene.search.spans.SpanQuery;
28 import org.apache.lucene.search.spans.SpanTermQuery;
29 import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
30 import org.apache.lucene.util.Version;
32 import java.util.ArrayList;
33 import java.util.Arrays;
34 import java.util.List;
38 * A QueryMaker that makes queries devised manually (by Grant Ingersoll) for
39 * searching in the Reuters collection.
41 public class ReutersQueryMaker extends AbstractQueryMaker implements QueryMaker {
43 private static String [] STANDARD_QUERIES = {
44 //Start with some short queries
45 "Salomon", "Comex", "night trading", "Japan Sony",
46 //Try some Phrase Queries
47 "\"Sony Japan\"", "\"food needs\"~3",
48 "\"World Bank\"^2 AND Nigeria", "\"World Bank\" -Nigeria",
50 //Try some longer queries
51 "airline Europe Canada destination",
52 "Long term pressure by trade " +
53 "ministers is necessary if the current Uruguay round of talks on " +
54 "the General Agreement on Trade and Tariffs (GATT) is to " +
58 private static Query[] getPrebuiltQueries(String field) {
59 // be wary of unanalyzed text
61 new SpanFirstQuery(new SpanTermQuery(new Term(field, "ford")), 5),
62 new SpanNearQuery(new SpanQuery[]{new SpanTermQuery(new Term(field, "night")), new SpanTermQuery(new Term(field, "trading"))}, 4, false),
63 new SpanNearQuery(new SpanQuery[]{new SpanFirstQuery(new SpanTermQuery(new Term(field, "ford")), 10), new SpanTermQuery(new Term(field, "credit"))}, 10, false),
64 new WildcardQuery(new Term(field, "fo*")),
69 * Parse the strings containing Lucene queries.
71 * @param qs array of strings containing query expressions
72 * @param a analyzer to use when parsing queries
73 * @return array of Lucene queries
75 private static Query[] createQueries(List<Object> qs, Analyzer a) {
76 QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, DocMaker.BODY_FIELD, a);
77 List<Object> queries = new ArrayList<Object>();
78 for (int i = 0; i < qs.size(); i++) {
81 Object query = qs.get(i);
83 if (query instanceof String) {
84 q = qp.parse((String) query);
86 } else if (query instanceof Query) {
90 System.err.println("Unsupported Query Type: " + query);
97 } catch (Exception e) {
102 return queries.toArray(new Query[0]);
106 protected Query[] prepareQueries() throws Exception {
107 // analyzer (default is standard analyzer)
108 Analyzer anlzr= NewAnalyzerTask.createAnalyzer(config.get("analyzer",
109 "org.apache.lucene.analysis.standard.StandardAnalyzer"));
111 List<Object> queryList = new ArrayList<Object>(20);
112 queryList.addAll(Arrays.asList(STANDARD_QUERIES));
113 queryList.addAll(Arrays.asList(getPrebuiltQueries(DocMaker.BODY_FIELD)));
114 return createQueries(queryList, anlzr);