1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.store.Directory;
21 import org.apache.lucene.util.LuceneTestCase;
22 import org.apache.lucene.analysis.MockAnalyzer;
23 import org.apache.lucene.document.Document;
24 import org.apache.lucene.document.Field;
25 import org.apache.lucene.document.Field.Store;
26 import org.apache.lucene.document.Field.Index;
27 import org.apache.lucene.index.IndexReader;
28 import org.apache.lucene.index.RandomIndexWriter;
29 import org.apache.lucene.index.Term;
30 import org.apache.lucene.queryParser.QueryParser;
32 import java.io.IOException;
35 * TestWildcard tests the '*' and '?' wildcard characters.
37 public class TestWildcard
38 extends LuceneTestCase {
41 public void setUp() throws Exception {
45 public void testEquals() {
46 WildcardQuery wq1 = new WildcardQuery(new Term("field", "b*a"));
47 WildcardQuery wq2 = new WildcardQuery(new Term("field", "b*a"));
48 WildcardQuery wq3 = new WildcardQuery(new Term("field", "b*a"));
51 assertEquals(wq1, wq2);
52 assertEquals(wq2, wq1);
55 assertEquals(wq2, wq3);
56 assertEquals(wq1, wq3);
58 assertFalse(wq1.equals(null));
60 FuzzyQuery fq = new FuzzyQuery(new Term("field", "b*a"));
61 assertFalse(wq1.equals(fq));
62 assertFalse(fq.equals(wq1));
66 * Tests if a WildcardQuery that has no wildcard in the term is rewritten to a single
67 * TermQuery. The boost should be preserved, and the rewrite should return
68 * a ConstantScoreQuery if the WildcardQuery had a ConstantScore rewriteMethod.
70 public void testTermWithoutWildcard() throws IOException {
71 Directory indexStore = getIndexStore("field", new String[]{"nowildcard", "nowildcardx"});
72 IndexReader reader = IndexReader.open(indexStore);
73 IndexSearcher searcher = new IndexSearcher(reader);
75 MultiTermQuery wq = new WildcardQuery(new Term("field", "nowildcard"));
76 assertMatches(searcher, wq, 1);
78 wq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
80 Query q = searcher.rewrite(wq);
81 assertTrue(q instanceof TermQuery);
82 assertEquals(q.getBoost(), wq.getBoost(), 0);
84 wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
86 q = searcher.rewrite(wq);
87 assertTrue(q instanceof ConstantScoreQuery);
88 assertEquals(q.getBoost(), wq.getBoost(), 0.1);
90 wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
92 q = searcher.rewrite(wq);
93 assertTrue(q instanceof ConstantScoreQuery);
94 assertEquals(q.getBoost(), wq.getBoost(), 0.1);
96 wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
98 q = searcher.rewrite(wq);
99 assertTrue(q instanceof ConstantScoreQuery);
100 assertEquals(q.getBoost(), wq.getBoost(), 0.1);
107 * Tests if a WildcardQuery with an empty term is rewritten to an empty BooleanQuery
109 public void testEmptyTerm() throws IOException {
110 Directory indexStore = getIndexStore("field", new String[]{"nowildcard", "nowildcardx"});
111 IndexReader reader = IndexReader.open(indexStore);
112 IndexSearcher searcher = new IndexSearcher(reader);
114 MultiTermQuery wq = new WildcardQuery(new Term("field", ""));
115 wq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
116 assertMatches(searcher, wq, 0);
117 Query q = searcher.rewrite(wq);
118 assertTrue(q instanceof BooleanQuery);
119 assertEquals(0, ((BooleanQuery) q).clauses().size());
126 * Tests if a WildcardQuery that has only a trailing * in the term is
127 * rewritten to a single PrefixQuery. The boost and rewriteMethod should be
130 public void testPrefixTerm() throws IOException {
131 Directory indexStore = getIndexStore("field", new String[]{"prefix", "prefixx"});
132 IndexReader reader = IndexReader.open(indexStore);
133 IndexSearcher searcher = new IndexSearcher(reader);
135 MultiTermQuery wq = new WildcardQuery(new Term("field", "prefix*"));
136 assertMatches(searcher, wq, 2);
137 assertTrue(wq.getEnum(searcher.getIndexReader()) instanceof PrefixTermEnum);
145 * Tests Wildcard queries with an asterisk.
147 public void testAsterisk()
149 Directory indexStore = getIndexStore("body", new String[]
150 {"metal", "metals"});
151 IndexReader reader = IndexReader.open(indexStore);
152 IndexSearcher searcher = new IndexSearcher(reader);
153 Query query1 = new TermQuery(new Term("body", "metal"));
154 Query query2 = new WildcardQuery(new Term("body", "metal*"));
155 Query query3 = new WildcardQuery(new Term("body", "m*tal"));
156 Query query4 = new WildcardQuery(new Term("body", "m*tal*"));
157 Query query5 = new WildcardQuery(new Term("body", "m*tals"));
159 BooleanQuery query6 = new BooleanQuery();
160 query6.add(query5, BooleanClause.Occur.SHOULD);
162 BooleanQuery query7 = new BooleanQuery();
163 query7.add(query3, BooleanClause.Occur.SHOULD);
164 query7.add(query5, BooleanClause.Occur.SHOULD);
166 // Queries do not automatically lower-case search terms:
167 Query query8 = new WildcardQuery(new Term("body", "M*tal*"));
169 assertMatches(searcher, query1, 1);
170 assertMatches(searcher, query2, 2);
171 assertMatches(searcher, query3, 1);
172 assertMatches(searcher, query4, 2);
173 assertMatches(searcher, query5, 1);
174 assertMatches(searcher, query6, 1);
175 assertMatches(searcher, query7, 2);
176 assertMatches(searcher, query8, 0);
177 assertMatches(searcher, new WildcardQuery(new Term("body", "*tall")), 0);
178 assertMatches(searcher, new WildcardQuery(new Term("body", "*tal")), 1);
179 assertMatches(searcher, new WildcardQuery(new Term("body", "*tal*")), 2);
188 public void testLotsOfAsterisks()
190 Directory indexStore = getIndexStore("body", new String[]
191 {"metal", "metals"});
192 IndexSearcher searcher = new IndexSearcher(indexStore, true);
193 StringBuilder term = new StringBuilder();
195 for (int i = 0; i < 512; i++)
198 Query query3 = new WildcardQuery(new Term("body", term.toString()));
200 assertMatches(searcher, query3, 1);
206 * Tests Wildcard queries with a question mark.
208 * @throws IOException if an error occurs
210 public void testQuestionmark()
212 Directory indexStore = getIndexStore("body", new String[]
213 {"metal", "metals", "mXtals", "mXtXls"});
214 IndexReader reader = IndexReader.open(indexStore);
215 IndexSearcher searcher = new IndexSearcher(reader);
216 Query query1 = new WildcardQuery(new Term("body", "m?tal"));
217 Query query2 = new WildcardQuery(new Term("body", "metal?"));
218 Query query3 = new WildcardQuery(new Term("body", "metals?"));
219 Query query4 = new WildcardQuery(new Term("body", "m?t?ls"));
220 Query query5 = new WildcardQuery(new Term("body", "M?t?ls"));
221 Query query6 = new WildcardQuery(new Term("body", "meta??"));
223 assertMatches(searcher, query1, 1);
224 assertMatches(searcher, query2, 1);
225 assertMatches(searcher, query3, 0);
226 assertMatches(searcher, query4, 3);
227 assertMatches(searcher, query5, 0);
228 assertMatches(searcher, query6, 1); // Query: 'meta??' matches 'metals' not 'metal'
234 private Directory getIndexStore(String field, String[] contents)
236 Directory indexStore = newDirectory();
237 RandomIndexWriter writer = new RandomIndexWriter(random, indexStore);
238 for (int i = 0; i < contents.length; ++i) {
239 Document doc = new Document();
240 doc.add(newField(field, contents[i], Field.Store.YES, Field.Index.ANALYZED));
241 writer.addDocument(doc);
248 private void assertMatches(IndexSearcher searcher, Query q, int expectedMatches)
250 ScoreDoc[] result = searcher.search(q, null, 1000).scoreDocs;
251 assertEquals(expectedMatches, result.length);
255 * Test that wild card queries are parsed to the correct type and are searched correctly.
256 * This test looks at both parsing and execution of wildcard queries.
257 * Although placed here, it also tests prefix queries, verifying that
258 * prefix queries are not parsed into wild card queries, and viceversa.
261 public void testParsingAndSearching() throws Exception {
262 String field = "content";
263 QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, field, new MockAnalyzer(random));
264 qp.setAllowLeadingWildcard(true);
270 // queries that should find all docs
271 String matchAll[] = {
272 "*", "*1", "**1", "*?", "*?1", "?*1", "**", "***", "\\\\*"
274 // queries that should find no docs
275 String matchNone[] = {
276 "a*h", "a?h", "*a*h", "?a", "a?",
278 // queries that should be parsed to prefix queries
279 String matchOneDocPrefix[][] = {
280 {"a*", "ab*", "abc*", }, // these should find only doc 0
281 {"h*", "hi*", "hij*", "\\\\7*"}, // these should find only doc 1
282 {"o*", "op*", "opq*", "\\\\\\\\*"}, // these should find only doc 2
284 // queries that should be parsed to wildcard queries
285 String matchOneDocWild[][] = {
286 {"*a*", "*ab*", "*abc**", "ab*e*", "*g?", "*f?1", "abc**"}, // these should find only doc 0
287 {"*h*", "*hi*", "*hij**", "hi*k*", "*n?", "*m?1", "hij**"}, // these should find only doc 1
288 {"*o*", "*op*", "*opq**", "op*q*", "*u?", "*t?1", "opq**"}, // these should find only doc 2
292 Directory dir = newDirectory();
293 RandomIndexWriter iw = new RandomIndexWriter(random, dir,
294 newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
295 .setMergePolicy(newLogMergePolicy()));
296 for (int i = 0; i < docs.length; i++) {
297 Document doc = new Document();
298 doc.add(newField(field,docs[i],Store.NO,Index.ANALYZED));
303 IndexReader reader = IndexReader.open(dir);
304 IndexSearcher searcher = new IndexSearcher(reader);
306 // test queries that must find all
307 for (int i = 0; i < matchAll.length; i++) {
308 String qtxt = matchAll[i];
309 Query q = qp.parse(qtxt);
310 if (VERBOSE) System.out.println("matchAll: qtxt="+qtxt+" q="+q+" "+q.getClass().getName());
311 ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs;
312 assertEquals(docs.length,hits.length);
315 // test queries that must find none
316 for (int i = 0; i < matchNone.length; i++) {
317 String qtxt = matchNone[i];
318 Query q = qp.parse(qtxt);
319 if (VERBOSE) System.out.println("matchNone: qtxt="+qtxt+" q="+q+" "+q.getClass().getName());
320 ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs;
321 assertEquals(0,hits.length);
324 // test queries that must be prefix queries and must find only one doc
325 for (int i = 0; i < matchOneDocPrefix.length; i++) {
326 for (int j = 0; j < matchOneDocPrefix[i].length; j++) {
327 String qtxt = matchOneDocPrefix[i][j];
328 Query q = qp.parse(qtxt);
329 if (VERBOSE) System.out.println("match 1 prefix: doc="+docs[i]+" qtxt="+qtxt+" q="+q+" "+q.getClass().getName());
330 assertEquals(PrefixQuery.class, q.getClass());
331 ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs;
332 assertEquals(1,hits.length);
333 assertEquals(i,hits[0].doc);
337 // test queries that must be wildcard queries and must find only one doc
338 for (int i = 0; i < matchOneDocPrefix.length; i++) {
339 for (int j = 0; j < matchOneDocWild[i].length; j++) {
340 String qtxt = matchOneDocWild[i][j];
341 Query q = qp.parse(qtxt);
342 if (VERBOSE) System.out.println("match 1 wild: doc="+docs[i]+" qtxt="+qtxt+" q="+q+" "+q.getClass().getName());
343 assertEquals(WildcardQuery.class, q.getClass());
344 ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs;
345 assertEquals(1,hits.length);
346 assertEquals(i,hits[0].doc);