1 package org.apache.lucene.xmlparser;
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.io.InputStream;
6 import java.io.InputStreamReader;
8 import org.apache.lucene.analysis.Analyzer;
9 import org.apache.lucene.analysis.standard.StandardAnalyzer;
10 import org.apache.lucene.document.Field;
11 import org.apache.lucene.document.NumericField;
12 import org.apache.lucene.index.IndexReader;
13 import org.apache.lucene.index.IndexWriter;
14 import org.apache.lucene.search.IndexSearcher;
15 import org.apache.lucene.search.Query;
16 import org.apache.lucene.search.ScoreDoc;
17 import org.apache.lucene.search.TopDocs;
18 import org.apache.lucene.store.Directory;
19 import org.apache.lucene.util.Version;
20 import org.apache.lucene.util.LuceneTestCase;
21 import org.junit.AfterClass;
22 import org.junit.Assume;
23 import org.junit.BeforeClass;
25 * Licensed to the Apache Software Foundation (ASF) under one or more
26 * contributor license agreements. See the NOTICE file distributed with
27 * this work for additional information regarding copyright ownership.
28 * The ASF licenses this file to You under the Apache License, Version 2.0
29 * (the "License"); you may not use this file except in compliance with
30 * the License. You may obtain a copy of the License at
32 * http://www.apache.org/licenses/LICENSE-2.0
34 * Unless required by applicable law or agreed to in writing, software
35 * distributed under the License is distributed on an "AS IS" BASIS,
36 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
37 * See the License for the specific language governing permissions and
38 * limitations under the License.
41 public class TestParser extends LuceneTestCase {
42 private static CoreParser builder;
43 private static Directory dir;
44 private static IndexReader reader;
45 private static IndexSearcher searcher;
48 public static void beforeClass() throws Exception {
49 // TODO: rewrite test (this needs to set QueryParser.enablePositionIncrements, too, for work with CURRENT):
50 Analyzer analyzer=new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_24);
51 //initialize the parser
52 builder=new CorePlusExtensionsParser("contents",analyzer);
54 BufferedReader d = new BufferedReader(new InputStreamReader(TestParser.class.getResourceAsStream("reuters21578.txt")));
56 IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(Version.LUCENE_24, analyzer));
57 String line = d.readLine();
60 int endOfDate=line.indexOf('\t');
61 String date=line.substring(0,endOfDate).trim();
62 String content=line.substring(endOfDate).trim();
63 org.apache.lucene.document.Document doc =new org.apache.lucene.document.Document();
64 doc.add(newField("date",date,Field.Store.YES,Field.Index.ANALYZED));
65 doc.add(newField("contents",content,Field.Store.YES,Field.Index.ANALYZED));
66 NumericField numericField = new NumericField("date2");
67 numericField.setIntValue(Integer.valueOf(date));
68 doc.add(numericField);
69 writer.addDocument(doc);
74 reader=IndexReader.open(dir, true);
75 searcher=newSearcher(reader);
83 public static void afterClass() throws Exception {
93 public void testSimpleXML() throws ParserException, IOException
95 Query q=parse("TermQuery.xml");
96 dumpResults("TermQuery", q, 5);
98 public void testSimpleTermsQueryXML() throws ParserException, IOException
100 Query q=parse("TermsQuery.xml");
101 dumpResults("TermsQuery", q, 5);
103 public void testBooleanQueryXML() throws ParserException, IOException
105 Query q=parse("BooleanQuery.xml");
106 dumpResults("BooleanQuery", q, 5);
108 public void testRangeFilterQueryXML() throws ParserException, IOException
110 Query q=parse("RangeFilterQuery.xml");
111 dumpResults("RangeFilter", q, 5);
113 public void testUserQueryXML() throws ParserException, IOException
115 Query q=parse("UserInputQuery.xml");
116 dumpResults("UserInput with Filter", q, 5);
119 public void testCustomFieldUserQueryXML() throws ParserException, IOException
121 Query q=parse("UserInputQueryCustomField.xml");
122 assertEquals(20.0f, q.getBoost());
123 int h = searcher.search(q, null, 1000).totalHits;
124 assertEquals("UserInputQueryCustomField should produce 0 result ", 0,h);
127 public void testLikeThisQueryXML() throws Exception
129 Query q=parse("LikeThisQuery.xml");
130 dumpResults("like this", q, 5);
132 public void testBoostingQueryXML() throws Exception
134 Query q=parse("BoostingQuery.xml");
135 dumpResults("boosting ",q, 5);
137 public void testFuzzyLikeThisQueryXML() throws Exception
139 Query q=parse("FuzzyLikeThisQuery.xml");
140 //show rewritten fuzzyLikeThisQuery - see what is being matched on
143 System.out.println(q.rewrite(reader));
145 dumpResults("FuzzyLikeThis", q, 5);
147 public void testTermsFilterXML() throws Exception
149 Query q=parse("TermsFilterQuery.xml");
150 dumpResults("Terms Filter",q, 5);
152 public void testBoostingTermQueryXML() throws Exception
154 Query q=parse("BoostingTermQuery.xml");
155 dumpResults("BoostingTermQuery",q, 5);
157 public void testSpanTermXML() throws Exception
159 Query q=parse("SpanQuery.xml");
160 dumpResults("Span Query",q, 5);
162 public void testConstantScoreQueryXML() throws Exception
164 Query q=parse("ConstantScoreQuery.xml");
165 dumpResults("ConstantScoreQuery",q, 5);
167 public void testMatchAllDocsPlusFilterXML() throws ParserException, IOException
169 Query q=parse("MatchAllDocsQuery.xml");
170 dumpResults("MatchAllDocsQuery with range filter", q, 5);
172 public void testBooleanFilterXML() throws ParserException, IOException
174 Query q=parse("BooleanFilter.xml");
175 dumpResults("Boolean filter", q, 5);
177 public void testNestedBooleanQuery() throws ParserException, IOException
179 Query q=parse("NestedBooleanQuery.xml");
180 dumpResults("Nested Boolean query", q, 5);
182 public void testCachedFilterXML() throws ParserException, IOException
184 Query q=parse("CachedFilter.xml");
185 dumpResults("Cached filter", q, 5);
187 public void testDuplicateFilterQueryXML() throws ParserException, IOException
189 Assume.assumeTrue(searcher.getIndexReader().getSequentialSubReaders() == null ||
190 searcher.getIndexReader().getSequentialSubReaders().length == 1);
191 Query q=parse("DuplicateFilterQuery.xml");
192 int h = searcher.search(q, null, 1000).totalHits;
193 assertEquals("DuplicateFilterQuery should produce 1 result ", 1,h);
196 public void testNumericRangeFilterQueryXML() throws ParserException, IOException
198 Query q=parse("NumericRangeFilterQuery.xml");
199 dumpResults("NumericRangeFilter", q, 5);
202 public void testNumericRangeQueryQueryXML() throws ParserException, IOException
204 Query q=parse("NumericRangeQueryQuery.xml");
205 dumpResults("NumericRangeQuery", q, 5);
210 //================= Helper methods ===================================
211 private Query parse(String xmlFileName) throws ParserException, IOException
213 InputStream xmlStream=TestParser.class.getResourceAsStream(xmlFileName);
214 Query result=builder.parse(xmlStream);
218 private void dumpResults(String qType,Query q, int numDocs) throws IOException
220 TopDocs hits = searcher.search(q, null, numDocs);
221 assertTrue(qType +" should produce results ", hits.totalHits>0);
224 System.out.println("========="+qType+"============");
225 ScoreDoc[] scoreDocs = hits.scoreDocs;
226 for(int i=0;i<Math.min(numDocs,hits.totalHits);i++)
228 org.apache.lucene.document.Document ldoc=searcher.doc(scoreDocs[i].doc);
229 System.out.println("["+ldoc.get("date")+"]"+ldoc.get("contents"));
231 System.out.println();