1 package org.apache.lucene.search.payloads;
3 * Licensed to the Apache Software Foundation (ASF) under one or more
4 * contributor license agreements. See the NOTICE file distributed with
5 * this work for additional information regarding copyright ownership.
6 * The ASF licenses this file to You under the Apache License, Version 2.0
7 * (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
19 import org.apache.lucene.util.LuceneTestCase;
20 import org.apache.lucene.util.English;
21 import org.apache.lucene.search.IndexSearcher;
22 import org.apache.lucene.search.QueryUtils;
23 import org.apache.lucene.search.TopDocs;
24 import org.apache.lucene.search.ScoreDoc;
25 import org.apache.lucene.search.CheckHits;
26 import org.apache.lucene.search.BooleanClause;
27 import org.apache.lucene.search.BooleanQuery;
28 import org.apache.lucene.search.DefaultSimilarity;
29 import org.apache.lucene.search.spans.SpanTermQuery;
30 import org.apache.lucene.search.spans.Spans;
31 import org.apache.lucene.search.spans.TermSpans;
32 import org.apache.lucene.analysis.Analyzer;
33 import org.apache.lucene.analysis.TokenStream;
34 import org.apache.lucene.analysis.LowerCaseTokenizer;
35 import org.apache.lucene.analysis.TokenFilter;
36 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
37 import org.apache.lucene.index.FieldInvertState;
38 import org.apache.lucene.index.IndexReader;
39 import org.apache.lucene.index.Payload;
40 import org.apache.lucene.index.RandomIndexWriter;
41 import org.apache.lucene.index.Term;
42 import org.apache.lucene.store.Directory;
43 import org.apache.lucene.document.Document;
44 import org.apache.lucene.document.Field;
46 import java.io.Reader;
47 import java.io.IOException;
54 public class TestPayloadTermQuery extends LuceneTestCase {
55 private IndexSearcher searcher;
56 private IndexReader reader;
57 private BoostingSimilarity similarity = new BoostingSimilarity();
58 private byte[] payloadField = new byte[]{1};
59 private byte[] payloadMultiField1 = new byte[]{2};
60 private byte[] payloadMultiField2 = new byte[]{4};
61 protected Directory directory;
63 private class PayloadAnalyzer extends Analyzer {
67 public TokenStream tokenStream(String fieldName, Reader reader) {
68 TokenStream result = new LowerCaseTokenizer(TEST_VERSION_CURRENT, reader);
69 result = new PayloadFilter(result, fieldName);
74 private class PayloadFilter extends TokenFilter {
75 private final String fieldName;
76 private int numSeen = 0;
78 private final PayloadAttribute payloadAtt;
80 public PayloadFilter(TokenStream input, String fieldName) {
82 this.fieldName = fieldName;
83 payloadAtt = addAttribute(PayloadAttribute.class);
87 public boolean incrementToken() throws IOException {
88 boolean hasNext = input.incrementToken();
90 if (fieldName.equals("field")) {
91 payloadAtt.setPayload(new Payload(payloadField));
92 } else if (fieldName.equals("multiField")) {
93 if (numSeen % 2 == 0) {
94 payloadAtt.setPayload(new Payload(payloadMultiField1));
96 payloadAtt.setPayload(new Payload(payloadMultiField2));
107 public void reset() throws IOException {
114 public void setUp() throws Exception {
116 directory = newDirectory();
117 RandomIndexWriter writer = new RandomIndexWriter(random, directory,
118 newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer())
119 .setSimilarity(similarity).setMergePolicy(newLogMergePolicy()));
120 //writer.infoStream = System.out;
121 for (int i = 0; i < 1000; i++) {
122 Document doc = new Document();
123 Field noPayloadField = newField(PayloadHelper.NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED);
124 //noPayloadField.setBoost(0);
125 doc.add(noPayloadField);
126 doc.add(newField("field", English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
127 doc.add(newField("multiField", English.intToEnglish(i) + " " + English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
128 writer.addDocument(doc);
130 reader = writer.getReader();
133 searcher = newSearcher(reader);
134 searcher.setSimilarity(similarity);
138 public void tearDown() throws Exception {
145 public void test() throws IOException {
146 PayloadTermQuery query = new PayloadTermQuery(new Term("field", "seventy"),
147 new MaxPayloadFunction());
148 TopDocs hits = searcher.search(query, null, 100);
149 assertTrue("hits is null and it shouldn't be", hits != null);
150 assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
152 //they should all have the exact same score, because they all contain seventy once, and we set
153 //all the other similarity factors to be 1
155 assertTrue(hits.getMaxScore() + " does not equal: " + 1, hits.getMaxScore() == 1);
156 for (int i = 0; i < hits.scoreDocs.length; i++) {
157 ScoreDoc doc = hits.scoreDocs[i];
158 assertTrue(doc.score + " does not equal: " + 1, doc.score == 1);
160 CheckHits.checkExplanations(query, PayloadHelper.FIELD, searcher, true);
161 Spans spans = query.getSpans(searcher.getIndexReader());
162 assertTrue("spans is null and it shouldn't be", spans != null);
163 assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);
164 /*float score = hits.score(0);
165 for (int i =1; i < hits.length(); i++)
167 assertTrue("scores are not equal and they should be", score == hits.score(i));
172 public void testQuery() {
173 PayloadTermQuery boostingFuncTermQuery = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
174 new MaxPayloadFunction());
175 QueryUtils.check(boostingFuncTermQuery);
177 SpanTermQuery spanTermQuery = new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"));
179 assertTrue(boostingFuncTermQuery.equals(spanTermQuery) == spanTermQuery.equals(boostingFuncTermQuery));
181 PayloadTermQuery boostingFuncTermQuery2 = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
182 new AveragePayloadFunction());
184 QueryUtils.checkUnequal(boostingFuncTermQuery, boostingFuncTermQuery2);
187 public void testMultipleMatchesPerDoc() throws Exception {
188 PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
189 new MaxPayloadFunction());
190 TopDocs hits = searcher.search(query, null, 100);
191 assertTrue("hits is null and it shouldn't be", hits != null);
192 assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
194 //they should all have the exact same score, because they all contain seventy once, and we set
195 //all the other similarity factors to be 1
197 //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash);
198 assertTrue(hits.getMaxScore() + " does not equal: " + 4.0, hits.getMaxScore() == 4.0);
199 //there should be exactly 10 items that score a 4, all the rest should score a 2
200 //The 10 items are: 70 + i*100 where i in [0-9]
202 for (int i = 0; i < hits.scoreDocs.length; i++) {
203 ScoreDoc doc = hits.scoreDocs[i];
204 if (doc.doc % 10 == 0) {
206 assertTrue(doc.score + " does not equal: " + 4.0, doc.score == 4.0);
208 assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
211 assertTrue(numTens + " does not equal: " + 10, numTens == 10);
212 CheckHits.checkExplanations(query, "field", searcher, true);
213 Spans spans = query.getSpans(searcher.getIndexReader());
214 assertTrue("spans is null and it shouldn't be", spans != null);
215 assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);
216 //should be two matches per document
218 //100 hits times 2 matches per hit, we should have 200 in count
219 while (spans.next()) {
222 assertTrue(count + " does not equal: " + 200, count == 200);
225 //Set includeSpanScore to false, in which case just the payload score comes through.
226 public void testIgnoreSpanScorer() throws Exception {
227 PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
228 new MaxPayloadFunction(), false);
230 IndexSearcher theSearcher = new IndexSearcher(directory, true);
231 theSearcher.setSimilarity(new FullSimilarity());
232 TopDocs hits = searcher.search(query, null, 100);
233 assertTrue("hits is null and it shouldn't be", hits != null);
234 assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
236 //they should all have the exact same score, because they all contain seventy once, and we set
237 //all the other similarity factors to be 1
239 //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash);
240 assertTrue(hits.getMaxScore() + " does not equal: " + 4.0, hits.getMaxScore() == 4.0);
241 //there should be exactly 10 items that score a 4, all the rest should score a 2
242 //The 10 items are: 70 + i*100 where i in [0-9]
244 for (int i = 0; i < hits.scoreDocs.length; i++) {
245 ScoreDoc doc = hits.scoreDocs[i];
246 if (doc.doc % 10 == 0) {
248 assertTrue(doc.score + " does not equal: " + 4.0, doc.score == 4.0);
250 assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
253 assertTrue(numTens + " does not equal: " + 10, numTens == 10);
254 CheckHits.checkExplanations(query, "field", searcher, true);
255 Spans spans = query.getSpans(searcher.getIndexReader());
256 assertTrue("spans is null and it shouldn't be", spans != null);
257 assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);
258 //should be two matches per document
260 //100 hits times 2 matches per hit, we should have 200 in count
261 while (spans.next()) {
267 public void testNoMatch() throws Exception {
268 PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.FIELD, "junk"),
269 new MaxPayloadFunction());
270 TopDocs hits = searcher.search(query, null, 100);
271 assertTrue("hits is null and it shouldn't be", hits != null);
272 assertTrue("hits Size: " + hits.totalHits + " is not: " + 0, hits.totalHits == 0);
276 public void testNoPayload() throws Exception {
277 PayloadTermQuery q1 = new PayloadTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "zero"),
278 new MaxPayloadFunction());
279 PayloadTermQuery q2 = new PayloadTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "foo"),
280 new MaxPayloadFunction());
281 BooleanClause c1 = new BooleanClause(q1, BooleanClause.Occur.MUST);
282 BooleanClause c2 = new BooleanClause(q2, BooleanClause.Occur.MUST_NOT);
283 BooleanQuery query = new BooleanQuery();
286 TopDocs hits = searcher.search(query, null, 100);
287 assertTrue("hits is null and it shouldn't be", hits != null);
288 assertTrue("hits Size: " + hits.totalHits + " is not: " + 1, hits.totalHits == 1);
289 int[] results = new int[1];
290 results[0] = 0;//hits.scoreDocs[0].doc;
291 CheckHits.checkHitCollector(random, query, PayloadHelper.NO_PAYLOAD_FIELD, searcher, results);
294 // must be static for weight serialization tests
295 static class BoostingSimilarity extends DefaultSimilarity {
297 // TODO: Remove warning after API has been finalized
299 public float scorePayload(int docId, String fieldName, int start, int end, byte[] payload, int offset, int length) {
300 //we know it is size 4 here, so ignore the offset/length
304 //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
305 //Make everything else 1 so we see the effect of the payload
306 //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
308 public float computeNorm(String fieldName, FieldInvertState state) {
309 return state.getBoost();
313 public float queryNorm(float sumOfSquaredWeights) {
318 public float sloppyFreq(int distance) {
323 public float coord(int overlap, int maxOverlap) {
328 public float idf(int docFreq, int numDocs) {
333 public float tf(float freq) {
334 return freq == 0 ? 0 : 1;
338 static class FullSimilarity extends DefaultSimilarity{
339 public float scorePayload(int docId, String fieldName, byte[] payload, int offset, int length) {
340 //we know it is size 4 here, so ignore the offset/length