1 package org.apache.lucene.search.payloads;
3 * Licensed to the Apache Software Foundation (ASF) under one or more
4 * contributor license agreements. See the NOTICE file distributed with
5 * this work for additional information regarding copyright ownership.
6 * The ASF licenses this file to You under the Apache License, Version 2.0
7 * (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
19 import org.apache.lucene.util.LuceneTestCase;
20 import org.apache.lucene.util.English;
21 import org.apache.lucene.search.IndexSearcher;
22 import org.apache.lucene.search.QueryUtils;
23 import org.apache.lucene.search.TopDocs;
24 import org.apache.lucene.search.ScoreDoc;
25 import org.apache.lucene.search.CheckHits;
26 import org.apache.lucene.search.BooleanClause;
27 import org.apache.lucene.search.BooleanQuery;
28 import org.apache.lucene.search.DefaultSimilarity;
29 import org.apache.lucene.search.spans.SpanTermQuery;
30 import org.apache.lucene.search.spans.Spans;
31 import org.apache.lucene.search.spans.TermSpans;
32 import org.apache.lucene.analysis.Analyzer;
33 import org.apache.lucene.analysis.TokenStream;
34 import org.apache.lucene.analysis.LowerCaseTokenizer;
35 import org.apache.lucene.analysis.TokenFilter;
36 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
37 import org.apache.lucene.index.FieldInvertState;
38 import org.apache.lucene.index.IndexReader;
39 import org.apache.lucene.index.Payload;
40 import org.apache.lucene.index.RandomIndexWriter;
41 import org.apache.lucene.index.Term;
42 import org.apache.lucene.store.Directory;
43 import org.apache.lucene.document.Document;
44 import org.apache.lucene.document.Field;
46 import java.io.Reader;
47 import java.io.IOException;
54 public class TestPayloadTermQuery extends LuceneTestCase {
55 private IndexSearcher searcher;
56 private IndexReader reader;
57 private BoostingSimilarity similarity = new BoostingSimilarity();
58 private byte[] payloadField = new byte[]{1};
59 private byte[] payloadMultiField1 = new byte[]{2};
60 private byte[] payloadMultiField2 = new byte[]{4};
61 protected Directory directory;
63 private class PayloadAnalyzer extends Analyzer {
67 public TokenStream tokenStream(String fieldName, Reader reader) {
68 TokenStream result = new LowerCaseTokenizer(TEST_VERSION_CURRENT, reader);
69 result = new PayloadFilter(result, fieldName);
74 private class PayloadFilter extends TokenFilter {
78 PayloadAttribute payloadAtt;
80 public PayloadFilter(TokenStream input, String fieldName) {
82 this.fieldName = fieldName;
83 payloadAtt = addAttribute(PayloadAttribute.class);
87 public boolean incrementToken() throws IOException {
88 boolean hasNext = input.incrementToken();
90 if (fieldName.equals("field")) {
91 payloadAtt.setPayload(new Payload(payloadField));
92 } else if (fieldName.equals("multiField")) {
93 if (numSeen % 2 == 0) {
94 payloadAtt.setPayload(new Payload(payloadMultiField1));
96 payloadAtt.setPayload(new Payload(payloadMultiField2));
108 public void setUp() throws Exception {
110 directory = newDirectory();
111 RandomIndexWriter writer = new RandomIndexWriter(random, directory,
112 newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer())
113 .setSimilarity(similarity).setMergePolicy(newLogMergePolicy()));
114 //writer.infoStream = System.out;
115 for (int i = 0; i < 1000; i++) {
116 Document doc = new Document();
117 Field noPayloadField = newField(PayloadHelper.NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED);
118 //noPayloadField.setBoost(0);
119 doc.add(noPayloadField);
120 doc.add(newField("field", English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
121 doc.add(newField("multiField", English.intToEnglish(i) + " " + English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
122 writer.addDocument(doc);
124 reader = writer.getReader();
127 searcher = newSearcher(reader);
128 searcher.setSimilarity(similarity);
132 public void tearDown() throws Exception {
139 public void test() throws IOException {
140 PayloadTermQuery query = new PayloadTermQuery(new Term("field", "seventy"),
141 new MaxPayloadFunction());
142 TopDocs hits = searcher.search(query, null, 100);
143 assertTrue("hits is null and it shouldn't be", hits != null);
144 assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
146 //they should all have the exact same score, because they all contain seventy once, and we set
147 //all the other similarity factors to be 1
149 assertTrue(hits.getMaxScore() + " does not equal: " + 1, hits.getMaxScore() == 1);
150 for (int i = 0; i < hits.scoreDocs.length; i++) {
151 ScoreDoc doc = hits.scoreDocs[i];
152 assertTrue(doc.score + " does not equal: " + 1, doc.score == 1);
154 CheckHits.checkExplanations(query, PayloadHelper.FIELD, searcher, true);
155 Spans spans = query.getSpans(searcher.getIndexReader());
156 assertTrue("spans is null and it shouldn't be", spans != null);
157 assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);
158 /*float score = hits.score(0);
159 for (int i =1; i < hits.length(); i++)
161 assertTrue("scores are not equal and they should be", score == hits.score(i));
166 public void testQuery() {
167 PayloadTermQuery boostingFuncTermQuery = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
168 new MaxPayloadFunction());
169 QueryUtils.check(boostingFuncTermQuery);
171 SpanTermQuery spanTermQuery = new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"));
173 assertTrue(boostingFuncTermQuery.equals(spanTermQuery) == spanTermQuery.equals(boostingFuncTermQuery));
175 PayloadTermQuery boostingFuncTermQuery2 = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
176 new AveragePayloadFunction());
178 QueryUtils.checkUnequal(boostingFuncTermQuery, boostingFuncTermQuery2);
181 public void testMultipleMatchesPerDoc() throws Exception {
182 PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
183 new MaxPayloadFunction());
184 TopDocs hits = searcher.search(query, null, 100);
185 assertTrue("hits is null and it shouldn't be", hits != null);
186 assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
188 //they should all have the exact same score, because they all contain seventy once, and we set
189 //all the other similarity factors to be 1
191 //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash);
192 assertTrue(hits.getMaxScore() + " does not equal: " + 4.0, hits.getMaxScore() == 4.0);
193 //there should be exactly 10 items that score a 4, all the rest should score a 2
194 //The 10 items are: 70 + i*100 where i in [0-9]
196 for (int i = 0; i < hits.scoreDocs.length; i++) {
197 ScoreDoc doc = hits.scoreDocs[i];
198 if (doc.doc % 10 == 0) {
200 assertTrue(doc.score + " does not equal: " + 4.0, doc.score == 4.0);
202 assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
205 assertTrue(numTens + " does not equal: " + 10, numTens == 10);
206 CheckHits.checkExplanations(query, "field", searcher, true);
207 Spans spans = query.getSpans(searcher.getIndexReader());
208 assertTrue("spans is null and it shouldn't be", spans != null);
209 assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);
210 //should be two matches per document
212 //100 hits times 2 matches per hit, we should have 200 in count
213 while (spans.next()) {
216 assertTrue(count + " does not equal: " + 200, count == 200);
219 //Set includeSpanScore to false, in which case just the payload score comes through.
220 public void testIgnoreSpanScorer() throws Exception {
221 PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
222 new MaxPayloadFunction(), false);
224 IndexSearcher theSearcher = new IndexSearcher(directory, true);
225 theSearcher.setSimilarity(new FullSimilarity());
226 TopDocs hits = searcher.search(query, null, 100);
227 assertTrue("hits is null and it shouldn't be", hits != null);
228 assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
230 //they should all have the exact same score, because they all contain seventy once, and we set
231 //all the other similarity factors to be 1
233 //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash);
234 assertTrue(hits.getMaxScore() + " does not equal: " + 4.0, hits.getMaxScore() == 4.0);
235 //there should be exactly 10 items that score a 4, all the rest should score a 2
236 //The 10 items are: 70 + i*100 where i in [0-9]
238 for (int i = 0; i < hits.scoreDocs.length; i++) {
239 ScoreDoc doc = hits.scoreDocs[i];
240 if (doc.doc % 10 == 0) {
242 assertTrue(doc.score + " does not equal: " + 4.0, doc.score == 4.0);
244 assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
247 assertTrue(numTens + " does not equal: " + 10, numTens == 10);
248 CheckHits.checkExplanations(query, "field", searcher, true);
249 Spans spans = query.getSpans(searcher.getIndexReader());
250 assertTrue("spans is null and it shouldn't be", spans != null);
251 assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);
252 //should be two matches per document
254 //100 hits times 2 matches per hit, we should have 200 in count
255 while (spans.next()) {
261 public void testNoMatch() throws Exception {
262 PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.FIELD, "junk"),
263 new MaxPayloadFunction());
264 TopDocs hits = searcher.search(query, null, 100);
265 assertTrue("hits is null and it shouldn't be", hits != null);
266 assertTrue("hits Size: " + hits.totalHits + " is not: " + 0, hits.totalHits == 0);
270 public void testNoPayload() throws Exception {
271 PayloadTermQuery q1 = new PayloadTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "zero"),
272 new MaxPayloadFunction());
273 PayloadTermQuery q2 = new PayloadTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "foo"),
274 new MaxPayloadFunction());
275 BooleanClause c1 = new BooleanClause(q1, BooleanClause.Occur.MUST);
276 BooleanClause c2 = new BooleanClause(q2, BooleanClause.Occur.MUST_NOT);
277 BooleanQuery query = new BooleanQuery();
280 TopDocs hits = searcher.search(query, null, 100);
281 assertTrue("hits is null and it shouldn't be", hits != null);
282 assertTrue("hits Size: " + hits.totalHits + " is not: " + 1, hits.totalHits == 1);
283 int[] results = new int[1];
284 results[0] = 0;//hits.scoreDocs[0].doc;
285 CheckHits.checkHitCollector(random, query, PayloadHelper.NO_PAYLOAD_FIELD, searcher, results);
288 // must be static for weight serialization tests
289 static class BoostingSimilarity extends DefaultSimilarity {
291 // TODO: Remove warning after API has been finalized
293 public float scorePayload(int docId, String fieldName, int start, int end, byte[] payload, int offset, int length) {
294 //we know it is size 4 here, so ignore the offset/length
298 //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
299 //Make everything else 1 so we see the effect of the payload
300 //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
302 public float computeNorm(String fieldName, FieldInvertState state) {
303 return state.getBoost();
307 public float queryNorm(float sumOfSquaredWeights) {
312 public float sloppyFreq(int distance) {
317 public float coord(int overlap, int maxOverlap) {
322 public float idf(int docFreq, int numDocs) {
327 public float tf(float freq) {
328 return freq == 0 ? 0 : 1;
332 static class FullSimilarity extends DefaultSimilarity{
333 public float scorePayload(int docId, String fieldName, byte[] payload, int offset, int length) {
334 //we know it is size 4 here, so ignore the offset/length