1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.util.LuceneTestCase;
21 import org.apache.lucene.analysis.MockAnalyzer;
22 import org.apache.lucene.analysis.WhitespaceAnalyzer;
23 import org.apache.lucene.document.Document;
24 import org.apache.lucene.document.Field;
25 import org.apache.lucene.index.IndexReader;
26 import org.apache.lucene.index.FieldInvertState;
27 import org.apache.lucene.index.RandomIndexWriter;
28 import org.apache.lucene.index.Term;
29 import org.apache.lucene.store.Directory;
31 import java.text.DecimalFormat;
32 import java.io.IOException;
35 * Test of the DisjunctionMaxQuery.
38 public class TestDisjunctionMaxQuery extends LuceneTestCase {
40 /** threshold for comparing floats */
41 public static final float SCORE_COMP_THRESH = 0.0000f;
44 * Similarity to eliminate tf, idf and lengthNorm effects to isolate test
48 * same as TestRankingSimilarity in TestRanking.zip from
49 * http://issues.apache.org/jira/browse/LUCENE-323
52 private static class TestSimilarity extends DefaultSimilarity {
54 public TestSimilarity() {}
57 public float tf(float freq) {
58 if (freq > 0.0f) return 1.0f;
63 public float computeNorm(String fieldName, FieldInvertState state) {
64 // Disable length norm
65 return state.getBoost();
69 public float idf(int docFreq, int numDocs) {
74 public Similarity sim = new TestSimilarity();
75 public Directory index;
77 public IndexSearcher s;
80 public void setUp() throws Exception {
83 index = newDirectory();
84 RandomIndexWriter writer = new RandomIndexWriter(random, index,
85 newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))
86 .setSimilarity(sim).setMergePolicy(newLogMergePolicy()));
88 // hed is the most important field, dek is secondary
90 // d1 is an "ok" match for: albino elephant
92 Document d1 = new Document();
93 d1.add(newField("id", "d1", Field.Store.YES, Field.Index.NOT_ANALYZED));// Field.Keyword("id",
96 .add(newField("hed", "elephant", Field.Store.YES,
97 Field.Index.ANALYZED));// Field.Text("hed", "elephant"));
99 .add(newField("dek", "elephant", Field.Store.YES,
100 Field.Index.ANALYZED));// Field.Text("dek", "elephant"));
101 writer.addDocument(d1);
104 // d2 is a "good" match for: albino elephant
106 Document d2 = new Document();
107 d2.add(newField("id", "d2", Field.Store.YES, Field.Index.NOT_ANALYZED));// Field.Keyword("id",
110 .add(newField("hed", "elephant", Field.Store.YES,
111 Field.Index.ANALYZED));// Field.Text("hed", "elephant"));
112 d2.add(newField("dek", "albino", Field.Store.YES, Field.Index.ANALYZED));// Field.Text("dek",
115 .add(newField("dek", "elephant", Field.Store.YES,
116 Field.Index.ANALYZED));// Field.Text("dek", "elephant"));
117 writer.addDocument(d2);
120 // d3 is a "better" match for: albino elephant
122 Document d3 = new Document();
123 d3.add(newField("id", "d3", Field.Store.YES, Field.Index.NOT_ANALYZED));// Field.Keyword("id",
125 d3.add(newField("hed", "albino", Field.Store.YES, Field.Index.ANALYZED));// Field.Text("hed",
128 .add(newField("hed", "elephant", Field.Store.YES,
129 Field.Index.ANALYZED));// Field.Text("hed", "elephant"));
130 writer.addDocument(d3);
133 // d4 is the "best" match for: albino elephant
135 Document d4 = new Document();
136 d4.add(newField("id", "d4", Field.Store.YES, Field.Index.NOT_ANALYZED));// Field.Keyword("id",
138 d4.add(newField("hed", "albino", Field.Store.YES, Field.Index.ANALYZED));// Field.Text("hed",
141 .add(newField("hed", "elephant", Field.Store.YES,
142 Field.Index.ANALYZED));// Field.Text("hed", "elephant"));
143 d4.add(newField("dek", "albino", Field.Store.YES, Field.Index.ANALYZED));// Field.Text("dek",
145 writer.addDocument(d4);
149 r = writer.getReader();
152 s.setSimilarity(sim);
156 public void tearDown() throws Exception {
163 public void testSkipToFirsttimeMiss() throws IOException {
164 final DisjunctionMaxQuery dq = new DisjunctionMaxQuery(0.0f);
165 dq.add(tq("id", "d1"));
166 dq.add(tq("dek", "DOES_NOT_EXIST"));
168 QueryUtils.check(random, dq, s);
170 final Weight dw = s.createNormalizedWeight(dq);
171 IndexReader sub = s.getIndexReader().getSequentialSubReaders() == null ?
172 s.getIndexReader() : s.getIndexReader().getSequentialSubReaders()[0];
173 final Scorer ds = dw.scorer(sub, true, false);
174 final boolean skipOk = ds.advance(3) != DocIdSetIterator.NO_MORE_DOCS;
176 fail("firsttime skipTo found a match? ... "
177 + r.document(ds.docID()).get("id"));
181 public void testSkipToFirsttimeHit() throws IOException {
182 final DisjunctionMaxQuery dq = new DisjunctionMaxQuery(0.0f);
183 dq.add(tq("dek", "albino"));
184 dq.add(tq("dek", "DOES_NOT_EXIST"));
186 QueryUtils.check(random, dq, s);
188 final Weight dw = s.createNormalizedWeight(dq);
189 IndexReader sub = s.getIndexReader().getSequentialSubReaders() == null ?
190 s.getIndexReader() : s.getIndexReader().getSequentialSubReaders()[0];
191 final Scorer ds = dw.scorer(sub, true, false);
192 assertTrue("firsttime skipTo found no match",
193 ds.advance(3) != DocIdSetIterator.NO_MORE_DOCS);
194 assertEquals("found wrong docid", "d4", r.document(ds.docID()).get("id"));
197 public void testSimpleEqualScores1() throws Exception {
199 DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.0f);
200 q.add(tq("hed", "albino"));
201 q.add(tq("hed", "elephant"));
202 QueryUtils.check(random, q, s);
204 ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;
207 assertEquals("all docs should match " + q.toString(), 4, h.length);
209 float score = h[0].score;
210 for (int i = 1; i < h.length; i++) {
211 assertEquals("score #" + i + " is not the same", score, h[i].score,
215 printHits("testSimpleEqualScores1", h, s);
221 public void testSimpleEqualScores2() throws Exception {
223 DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.0f);
224 q.add(tq("dek", "albino"));
225 q.add(tq("dek", "elephant"));
226 QueryUtils.check(random, q, s);
228 ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;
231 assertEquals("3 docs should match " + q.toString(), 3, h.length);
232 float score = h[0].score;
233 for (int i = 1; i < h.length; i++) {
234 assertEquals("score #" + i + " is not the same", score, h[i].score,
238 printHits("testSimpleEqualScores2", h, s);
244 public void testSimpleEqualScores3() throws Exception {
246 DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.0f);
247 q.add(tq("hed", "albino"));
248 q.add(tq("hed", "elephant"));
249 q.add(tq("dek", "albino"));
250 q.add(tq("dek", "elephant"));
251 QueryUtils.check(random, q, s);
253 ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;
256 assertEquals("all docs should match " + q.toString(), 4, h.length);
257 float score = h[0].score;
258 for (int i = 1; i < h.length; i++) {
259 assertEquals("score #" + i + " is not the same", score, h[i].score,
263 printHits("testSimpleEqualScores3", h, s);
269 public void testSimpleTiebreaker() throws Exception {
271 DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.01f);
272 q.add(tq("dek", "albino"));
273 q.add(tq("dek", "elephant"));
274 QueryUtils.check(random, q, s);
276 ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;
279 assertEquals("3 docs should match " + q.toString(), 3, h.length);
280 assertEquals("wrong first", "d2", s.doc(h[0].doc).get("id"));
281 float score0 = h[0].score;
282 float score1 = h[1].score;
283 float score2 = h[2].score;
284 assertTrue("d2 does not have better score then others: " + score0
285 + " >? " + score1, score0 > score1);
286 assertEquals("d4 and d1 don't have equal scores", score1, score2,
289 printHits("testSimpleTiebreaker", h, s);
294 public void testBooleanRequiredEqualScores() throws Exception {
296 BooleanQuery q = new BooleanQuery();
298 DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.0f);
299 q1.add(tq("hed", "albino"));
300 q1.add(tq("dek", "albino"));
301 q.add(q1, BooleanClause.Occur.MUST);// true,false);
302 QueryUtils.check(random, q1, s);
306 DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.0f);
307 q2.add(tq("hed", "elephant"));
308 q2.add(tq("dek", "elephant"));
309 q.add(q2, BooleanClause.Occur.MUST);// true,false);
310 QueryUtils.check(random, q2, s);
313 QueryUtils.check(random, q, s);
315 ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;
318 assertEquals("3 docs should match " + q.toString(), 3, h.length);
319 float score = h[0].score;
320 for (int i = 1; i < h.length; i++) {
321 assertEquals("score #" + i + " is not the same", score, h[i].score,
325 printHits("testBooleanRequiredEqualScores1", h, s);
330 public void testBooleanOptionalNoTiebreaker() throws Exception {
332 BooleanQuery q = new BooleanQuery();
334 DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.0f);
335 q1.add(tq("hed", "albino"));
336 q1.add(tq("dek", "albino"));
337 q.add(q1, BooleanClause.Occur.SHOULD);// false,false);
340 DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.0f);
341 q2.add(tq("hed", "elephant"));
342 q2.add(tq("dek", "elephant"));
343 q.add(q2, BooleanClause.Occur.SHOULD);// false,false);
345 QueryUtils.check(random, q, s);
347 ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;
350 assertEquals("4 docs should match " + q.toString(), 4, h.length);
351 float score = h[0].score;
352 for (int i = 1; i < h.length - 1; i++) { /* note: -1 */
353 assertEquals("score #" + i + " is not the same", score, h[i].score,
356 assertEquals("wrong last", "d1", s.doc(h[h.length - 1].doc).get("id"));
357 float score1 = h[h.length - 1].score;
358 assertTrue("d1 does not have worse score then others: " + score + " >? "
359 + score1, score > score1);
361 printHits("testBooleanOptionalNoTiebreaker", h, s);
366 public void testBooleanOptionalWithTiebreaker() throws Exception {
368 BooleanQuery q = new BooleanQuery();
370 DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.01f);
371 q1.add(tq("hed", "albino"));
372 q1.add(tq("dek", "albino"));
373 q.add(q1, BooleanClause.Occur.SHOULD);// false,false);
376 DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.01f);
377 q2.add(tq("hed", "elephant"));
378 q2.add(tq("dek", "elephant"));
379 q.add(q2, BooleanClause.Occur.SHOULD);// false,false);
381 QueryUtils.check(random, q, s);
383 ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;
387 assertEquals("4 docs should match " + q.toString(), 4, h.length);
389 float score0 = h[0].score;
390 float score1 = h[1].score;
391 float score2 = h[2].score;
392 float score3 = h[3].score;
394 String doc0 = s.doc(h[0].doc).get("id");
395 String doc1 = s.doc(h[1].doc).get("id");
396 String doc2 = s.doc(h[2].doc).get("id");
397 String doc3 = s.doc(h[3].doc).get("id");
399 assertTrue("doc0 should be d2 or d4: " + doc0, doc0.equals("d2")
400 || doc0.equals("d4"));
401 assertTrue("doc1 should be d2 or d4: " + doc0, doc1.equals("d2")
402 || doc1.equals("d4"));
403 assertEquals("score0 and score1 should match", score0, score1,
405 assertEquals("wrong third", "d3", doc2);
406 assertTrue("d3 does not have worse score then d2 and d4: " + score1
407 + " >? " + score2, score1 > score2);
409 assertEquals("wrong fourth", "d1", doc3);
410 assertTrue("d1 does not have worse score then d3: " + score2 + " >? "
411 + score3, score2 > score3);
414 printHits("testBooleanOptionalWithTiebreaker", h, s);
420 public void testBooleanOptionalWithTiebreakerAndBoost() throws Exception {
422 BooleanQuery q = new BooleanQuery();
424 DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.01f);
425 q1.add(tq("hed", "albino", 1.5f));
426 q1.add(tq("dek", "albino"));
427 q.add(q1, BooleanClause.Occur.SHOULD);// false,false);
430 DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.01f);
431 q2.add(tq("hed", "elephant", 1.5f));
432 q2.add(tq("dek", "elephant"));
433 q.add(q2, BooleanClause.Occur.SHOULD);// false,false);
435 QueryUtils.check(random, q, s);
437 ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;
441 assertEquals("4 docs should match " + q.toString(), 4, h.length);
443 float score0 = h[0].score;
444 float score1 = h[1].score;
445 float score2 = h[2].score;
446 float score3 = h[3].score;
448 String doc0 = s.doc(h[0].doc).get("id");
449 String doc1 = s.doc(h[1].doc).get("id");
450 String doc2 = s.doc(h[2].doc).get("id");
451 String doc3 = s.doc(h[3].doc).get("id");
453 assertEquals("doc0 should be d4: ", "d4", doc0);
454 assertEquals("doc1 should be d3: ", "d3", doc1);
455 assertEquals("doc2 should be d2: ", "d2", doc2);
456 assertEquals("doc3 should be d1: ", "d1", doc3);
458 assertTrue("d4 does not have a better score then d3: " + score0 + " >? "
459 + score1, score0 > score1);
460 assertTrue("d3 does not have a better score then d2: " + score1 + " >? "
461 + score2, score1 > score2);
462 assertTrue("d3 does not have a better score then d1: " + score2 + " >? "
463 + score3, score2 > score3);
466 printHits("testBooleanOptionalWithTiebreakerAndBoost", h, s);
472 protected Query tq(String f, String t) {
473 return new TermQuery(new Term(f, t));
477 protected Query tq(String f, String t, float b) {
483 protected void printHits(String test, ScoreDoc[] h, Searcher searcher)
486 System.err.println("------- " + test + " -------");
488 DecimalFormat f = new DecimalFormat("0.000000000");
490 for (int i = 0; i < h.length; i++) {
491 Document d = searcher.doc(h[i].doc);
492 float score = h[i].score;
494 .println("#" + i + ": " + f.format(score) + " - " + d.get("id"));