1 package org.apache.lucene.search.payloads;
3 * Licensed to the Apache Software Foundation (ASF) under one or more
4 * contributor license agreements. See the NOTICE file distributed with
5 * this work for additional information regarding copyright ownership.
6 * The ASF licenses this file to You under the Apache License, Version 2.0
7 * (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 import java.io.IOException;
19 import java.io.Reader;
20 import java.util.Collection;
22 import org.apache.lucene.analysis.Analyzer;
23 import org.apache.lucene.analysis.LowerCaseTokenizer;
24 import org.apache.lucene.analysis.TokenFilter;
25 import org.apache.lucene.analysis.TokenStream;
26 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
27 import org.apache.lucene.document.Document;
28 import org.apache.lucene.document.Field;
29 import org.apache.lucene.index.FieldInvertState;
30 import org.apache.lucene.index.IndexReader;
31 import org.apache.lucene.index.Payload;
32 import org.apache.lucene.index.RandomIndexWriter;
33 import org.apache.lucene.index.Term;
34 import org.apache.lucene.search.DefaultSimilarity;
35 import org.apache.lucene.search.Explanation;
36 import org.apache.lucene.search.IndexSearcher;
37 import org.apache.lucene.search.QueryUtils;
38 import org.apache.lucene.search.ScoreDoc;
39 import org.apache.lucene.search.Searcher;
40 import org.apache.lucene.search.TopDocs;
41 import org.apache.lucene.search.spans.SpanQuery;
42 import org.apache.lucene.search.spans.SpanNearQuery;
43 import org.apache.lucene.search.spans.SpanTermQuery;
44 import org.apache.lucene.store.Directory;
45 import org.apache.lucene.util.English;
46 import org.apache.lucene.util.LuceneTestCase;
47 import org.apache.lucene.search.Explanation.IDFExplanation;
48 import org.junit.AfterClass;
49 import org.junit.BeforeClass;
52 public class TestPayloadNearQuery extends LuceneTestCase {
53 private static IndexSearcher searcher;
54 private static IndexReader reader;
55 private static Directory directory;
56 private static BoostingSimilarity similarity = new BoostingSimilarity();
57 private static byte[] payload2 = new byte[]{2};
58 private static byte[] payload4 = new byte[]{4};
60 private static class PayloadAnalyzer extends Analyzer {
62 public TokenStream tokenStream(String fieldName, Reader reader) {
63 TokenStream result = new LowerCaseTokenizer(TEST_VERSION_CURRENT, reader);
64 result = new PayloadFilter(result, fieldName);
69 private static class PayloadFilter extends TokenFilter {
70 private final String fieldName;
71 private int numSeen = 0;
72 private final PayloadAttribute payAtt;
74 public PayloadFilter(TokenStream input, String fieldName) {
76 this.fieldName = fieldName;
77 payAtt = addAttribute(PayloadAttribute.class);
81 public boolean incrementToken() throws IOException {
82 boolean result = false;
83 if (input.incrementToken()) {
84 if (numSeen % 2 == 0) {
85 payAtt.setPayload(new Payload(payload2));
87 payAtt.setPayload(new Payload(payload4));
96 public void reset() throws IOException {
102 private PayloadNearQuery newPhraseQuery (String fieldName, String phrase, boolean inOrder, PayloadFunction function ) {
103 String[] words = phrase.split("[\\s]+");
104 SpanQuery clauses[] = new SpanQuery[words.length];
105 for (int i=0;i<clauses.length;i++) {
106 clauses[i] = new SpanTermQuery(new Term(fieldName, words[i]));
108 return new PayloadNearQuery(clauses, 0, inOrder, function);
112 public static void beforeClass() throws Exception {
113 directory = newDirectory();
114 RandomIndexWriter writer = new RandomIndexWriter(random, directory,
115 newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer())
116 .setSimilarity(similarity));
117 //writer.infoStream = System.out;
118 for (int i = 0; i < 1000; i++) {
119 Document doc = new Document();
120 doc.add(newField("field", English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
121 String txt = English.intToEnglish(i) +' '+English.intToEnglish(i+1);
122 doc.add(newField("field2", txt, Field.Store.YES, Field.Index.ANALYZED));
123 writer.addDocument(doc);
125 reader = writer.getReader();
128 searcher = newSearcher(reader);
129 searcher.setSimilarity(similarity);
133 public static void afterClass() throws Exception {
142 public void test() throws IOException {
143 PayloadNearQuery query;
146 query = newPhraseQuery("field", "twenty two", true, new AveragePayloadFunction());
147 QueryUtils.check(query);
149 // all 10 hits should have score = 3 because adjacent terms have payloads of 2,4
150 // and all the similarity factors are set to 1
151 hits = searcher.search(query, null, 100);
152 assertTrue("hits is null and it shouldn't be", hits != null);
153 assertTrue("should be 10 hits", hits.totalHits == 10);
154 for (int j = 0; j < hits.scoreDocs.length; j++) {
155 ScoreDoc doc = hits.scoreDocs[j];
156 assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
158 for (int i=1;i<10;i++) {
159 query = newPhraseQuery("field", English.intToEnglish(i)+" hundred", true, new AveragePayloadFunction());
160 // all should have score = 3 because adjacent terms have payloads of 2,4
161 // and all the similarity factors are set to 1
162 hits = searcher.search(query, null, 100);
163 assertTrue("hits is null and it shouldn't be", hits != null);
164 assertTrue("should be 100 hits", hits.totalHits == 100);
165 for (int j = 0; j < hits.scoreDocs.length; j++) {
166 ScoreDoc doc = hits.scoreDocs[j];
167 // System.out.println("Doc: " + doc.toString());
168 // System.out.println("Explain: " + searcher.explain(query, doc.doc));
169 assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
175 public void testPayloadNear() throws IOException {
176 SpanNearQuery q1, q2;
177 PayloadNearQuery query;
178 //SpanNearQuery(clauses, 10000, false)
179 q1 = spanNearQuery("field2", "twenty two");
180 q2 = spanNearQuery("field2", "twenty three");
181 SpanQuery[] clauses = new SpanQuery[2];
184 query = new PayloadNearQuery(clauses, 10, false);
185 //System.out.println(query.toString());
186 assertEquals(12, searcher.search(query, null, 100).totalHits);
188 System.out.println(hits.totalHits);
189 for (int j = 0; j < hits.scoreDocs.length; j++) {
190 ScoreDoc doc = hits.scoreDocs[j];
191 System.out.println("doc: "+doc.doc+", score: "+doc.score);
196 public void testAverageFunction() throws IOException {
197 PayloadNearQuery query;
200 query = newPhraseQuery("field", "twenty two", true, new AveragePayloadFunction());
201 QueryUtils.check(query);
202 // all 10 hits should have score = 3 because adjacent terms have payloads of 2,4
203 // and all the similarity factors are set to 1
204 hits = searcher.search(query, null, 100);
205 assertTrue("hits is null and it shouldn't be", hits != null);
206 assertTrue("should be 10 hits", hits.totalHits == 10);
207 for (int j = 0; j < hits.scoreDocs.length; j++) {
208 ScoreDoc doc = hits.scoreDocs[j];
209 assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
210 Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
211 String exp = explain.toString();
212 assertTrue(exp, exp.indexOf("AveragePayloadFunction") > -1);
213 assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 3, explain.getValue() == 3f);
216 public void testMaxFunction() throws IOException {
217 PayloadNearQuery query;
220 query = newPhraseQuery("field", "twenty two", true, new MaxPayloadFunction());
221 QueryUtils.check(query);
222 // all 10 hits should have score = 4 (max payload value)
223 hits = searcher.search(query, null, 100);
224 assertTrue("hits is null and it shouldn't be", hits != null);
225 assertTrue("should be 10 hits", hits.totalHits == 10);
226 for (int j = 0; j < hits.scoreDocs.length; j++) {
227 ScoreDoc doc = hits.scoreDocs[j];
228 assertTrue(doc.score + " does not equal: " + 4, doc.score == 4);
229 Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
230 String exp = explain.toString();
231 assertTrue(exp, exp.indexOf("MaxPayloadFunction") > -1);
232 assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 4, explain.getValue() == 4f);
235 public void testMinFunction() throws IOException {
236 PayloadNearQuery query;
239 query = newPhraseQuery("field", "twenty two", true, new MinPayloadFunction());
240 QueryUtils.check(query);
241 // all 10 hits should have score = 2 (min payload value)
242 hits = searcher.search(query, null, 100);
243 assertTrue("hits is null and it shouldn't be", hits != null);
244 assertTrue("should be 10 hits", hits.totalHits == 10);
245 for (int j = 0; j < hits.scoreDocs.length; j++) {
246 ScoreDoc doc = hits.scoreDocs[j];
247 assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
248 Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
249 String exp = explain.toString();
250 assertTrue(exp, exp.indexOf("MinPayloadFunction") > -1);
251 assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 2, explain.getValue() == 2f);
254 private SpanQuery[] getClauses() {
255 SpanNearQuery q1, q2;
256 q1 = spanNearQuery("field2", "twenty two");
257 q2 = spanNearQuery("field2", "twenty three");
258 SpanQuery[] clauses = new SpanQuery[2];
263 private SpanNearQuery spanNearQuery(String fieldName, String words) {
264 String[] wordList = words.split("[\\s]+");
265 SpanQuery clauses[] = new SpanQuery[wordList.length];
266 for (int i=0;i<clauses.length;i++) {
267 clauses[i] = new PayloadTermQuery(new Term(fieldName, wordList[i]), new AveragePayloadFunction());
269 return new SpanNearQuery(clauses, 10000, false);
272 public void testLongerSpan() throws IOException {
273 PayloadNearQuery query;
275 query = newPhraseQuery("field", "nine hundred ninety nine", true, new AveragePayloadFunction());
276 hits = searcher.search(query, null, 100);
277 assertTrue("hits is null and it shouldn't be", hits != null);
278 ScoreDoc doc = hits.scoreDocs[0];
279 // System.out.println("Doc: " + doc.toString());
280 // System.out.println("Explain: " + searcher.explain(query, doc.doc));
281 assertTrue("there should only be one hit", hits.totalHits == 1);
282 // should have score = 3 because adjacent terms have payloads of 2,4
283 assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
286 public void testComplexNested() throws IOException {
287 PayloadNearQuery query;
290 // combine ordered and unordered spans with some nesting to make sure all payloads are counted
292 SpanQuery q1 = newPhraseQuery("field", "nine hundred", true, new AveragePayloadFunction());
293 SpanQuery q2 = newPhraseQuery("field", "ninety nine", true, new AveragePayloadFunction());
294 SpanQuery q3 = newPhraseQuery("field", "nine ninety", false, new AveragePayloadFunction());
295 SpanQuery q4 = newPhraseQuery("field", "hundred nine", false, new AveragePayloadFunction());
296 SpanQuery[]clauses = new SpanQuery[] {new PayloadNearQuery(new SpanQuery[] {q1,q2}, 0, true), new PayloadNearQuery(new SpanQuery[] {q3,q4}, 0, false)};
297 query = new PayloadNearQuery(clauses, 0, false);
298 hits = searcher.search(query, null, 100);
299 assertTrue("hits is null and it shouldn't be", hits != null);
300 // should be only 1 hit - doc 999
301 assertTrue("should only be one hit", hits.scoreDocs.length == 1);
302 // the score should be 3 - the average of all the underlying payloads
303 ScoreDoc doc = hits.scoreDocs[0];
304 // System.out.println("Doc: " + doc.toString());
305 // System.out.println("Explain: " + searcher.explain(query, doc.doc));
306 assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
309 // must be static for weight serialization tests
310 static class BoostingSimilarity extends DefaultSimilarity {
312 @Override public float scorePayload(int docId, String fieldName, int start, int end, byte[] payload, int offset, int length) {
313 //we know it is size 4 here, so ignore the offset/length
316 //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
317 //Make everything else 1 so we see the effect of the payload
318 //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
319 @Override public float computeNorm(String fieldName, FieldInvertState state) {
320 return state.getBoost();
323 @Override public float queryNorm(float sumOfSquaredWeights) {
327 @Override public float sloppyFreq(int distance) {
331 @Override public float coord(int overlap, int maxOverlap) {
334 @Override public float tf(float freq) {
337 // idf used for phrase queries
338 @Override public IDFExplanation idfExplain(Collection<Term> terms, Searcher searcher) throws IOException {
339 return new IDFExplanation() {
341 public float getIdf() {
345 public String explain() {
346 return "Inexplicable";