1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.Reader;
21 import java.io.IOException;
22 import java.io.StringReader;
23 import java.util.Collection;
24 import java.util.Collections;
25 import org.apache.lucene.analysis.Analyzer;
26 import org.apache.lucene.analysis.StopFilter;
27 import org.apache.lucene.analysis.TokenStream;
28 import org.apache.lucene.analysis.WhitespaceAnalyzer;
29 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
30 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
31 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
32 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
33 import org.apache.lucene.analysis.CharArraySet;
34 import org.apache.lucene.document.Document;
35 import org.apache.lucene.document.Field;
36 import org.apache.lucene.index.IndexReader;
37 import org.apache.lucene.index.RandomIndexWriter;
38 import org.apache.lucene.index.Term;
39 import org.apache.lucene.index.TermPositions;
40 import org.apache.lucene.queryParser.QueryParser;
41 import org.apache.lucene.store.Directory;
42 import org.apache.lucene.analysis.LowerCaseTokenizer;
43 import org.apache.lucene.analysis.TokenFilter;
44 import org.apache.lucene.index.Payload;
45 import org.apache.lucene.search.payloads.PayloadSpanUtil;
46 import org.apache.lucene.search.spans.SpanNearQuery;
47 import org.apache.lucene.search.spans.SpanQuery;
48 import org.apache.lucene.search.spans.SpanTermQuery;
49 import org.apache.lucene.search.spans.Spans;
50 import org.apache.lucene.util.Version;
51 import org.apache.lucene.util.LuceneTestCase;
54 * Term position unit test.
57 * @version $Revision: 1066722 $
59 public class TestPositionIncrement extends LuceneTestCase {
61 public void testSetPosition() throws Exception {
62 Analyzer analyzer = new Analyzer() {
64 public TokenStream tokenStream(String fieldName, Reader reader) {
65 return new TokenStream() {
66 private final String[] TOKENS = {"1", "2", "3", "4", "5"};
67 private final int[] INCREMENTS = {0, 2, 1, 0, 1};
70 PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
71 CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
72 OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
75 public boolean incrementToken() {
76 if (i == TOKENS.length)
79 termAtt.append(TOKENS[i]);
80 offsetAtt.setOffset(i,i);
81 posIncrAtt.setPositionIncrement(INCREMENTS[i]);
88 Directory store = newDirectory();
89 RandomIndexWriter writer = new RandomIndexWriter(random, store, analyzer);
90 Document d = new Document();
91 d.add(newField("field", "bogus", Field.Store.YES, Field.Index.ANALYZED));
92 writer.addDocument(d);
93 IndexReader reader = writer.getReader();
97 IndexSearcher searcher = newSearcher(reader);
99 TermPositions pos = searcher.getIndexReader().termPositions(new Term("field", "1"));
101 // first token should be at position 0
102 assertEquals(0, pos.nextPosition());
104 pos = searcher.getIndexReader().termPositions(new Term("field", "2"));
106 // second token should be at position 2
107 assertEquals(2, pos.nextPosition());
112 q = new PhraseQuery();
113 q.add(new Term("field", "1"));
114 q.add(new Term("field", "2"));
115 hits = searcher.search(q, null, 1000).scoreDocs;
116 assertEquals(0, hits.length);
118 // same as previous, just specify positions explicitely.
119 q = new PhraseQuery();
120 q.add(new Term("field", "1"),0);
121 q.add(new Term("field", "2"),1);
122 hits = searcher.search(q, null, 1000).scoreDocs;
123 assertEquals(0, hits.length);
125 // specifying correct positions should find the phrase.
126 q = new PhraseQuery();
127 q.add(new Term("field", "1"),0);
128 q.add(new Term("field", "2"),2);
129 hits = searcher.search(q, null, 1000).scoreDocs;
130 assertEquals(1, hits.length);
132 q = new PhraseQuery();
133 q.add(new Term("field", "2"));
134 q.add(new Term("field", "3"));
135 hits = searcher.search(q, null, 1000).scoreDocs;
136 assertEquals(1, hits.length);
138 q = new PhraseQuery();
139 q.add(new Term("field", "3"));
140 q.add(new Term("field", "4"));
141 hits = searcher.search(q, null, 1000).scoreDocs;
142 assertEquals(0, hits.length);
144 // phrase query would find it when correct positions are specified.
145 q = new PhraseQuery();
146 q.add(new Term("field", "3"),0);
147 q.add(new Term("field", "4"),0);
148 hits = searcher.search(q, null, 1000).scoreDocs;
149 assertEquals(1, hits.length);
151 // phrase query should fail for non existing searched term
152 // even if there exist another searched terms in the same searched position.
153 q = new PhraseQuery();
154 q.add(new Term("field", "3"),0);
155 q.add(new Term("field", "9"),0);
156 hits = searcher.search(q, null, 1000).scoreDocs;
157 assertEquals(0, hits.length);
159 // multi-phrase query should succed for non existing searched term
160 // because there exist another searched terms in the same searched position.
161 MultiPhraseQuery mq = new MultiPhraseQuery();
162 mq.add(new Term[]{new Term("field", "3"),new Term("field", "9")},0);
163 hits = searcher.search(mq, null, 1000).scoreDocs;
164 assertEquals(1, hits.length);
166 q = new PhraseQuery();
167 q.add(new Term("field", "2"));
168 q.add(new Term("field", "4"));
169 hits = searcher.search(q, null, 1000).scoreDocs;
170 assertEquals(1, hits.length);
172 q = new PhraseQuery();
173 q.add(new Term("field", "3"));
174 q.add(new Term("field", "5"));
175 hits = searcher.search(q, null, 1000).scoreDocs;
176 assertEquals(1, hits.length);
178 q = new PhraseQuery();
179 q.add(new Term("field", "4"));
180 q.add(new Term("field", "5"));
181 hits = searcher.search(q, null, 1000).scoreDocs;
182 assertEquals(1, hits.length);
184 q = new PhraseQuery();
185 q.add(new Term("field", "2"));
186 q.add(new Term("field", "5"));
187 hits = searcher.search(q, null, 1000).scoreDocs;
188 assertEquals(0, hits.length);
190 // should not find "1 2" because there is a gap of 1 in the index
191 QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field",
192 new StopWhitespaceAnalyzer(false));
193 q = (PhraseQuery) qp.parse("\"1 2\"");
194 hits = searcher.search(q, null, 1000).scoreDocs;
195 assertEquals(0, hits.length);
197 // omitted stop word cannot help because stop filter swallows the increments.
198 q = (PhraseQuery) qp.parse("\"1 stop 2\"");
199 hits = searcher.search(q, null, 1000).scoreDocs;
200 assertEquals(0, hits.length);
202 // query parser alone won't help, because stop filter swallows the increments.
203 qp.setEnablePositionIncrements(true);
204 q = (PhraseQuery) qp.parse("\"1 stop 2\"");
205 hits = searcher.search(q, null, 1000).scoreDocs;
206 assertEquals(0, hits.length);
208 // stop filter alone won't help, because query parser swallows the increments.
209 qp.setEnablePositionIncrements(false);
210 q = (PhraseQuery) qp.parse("\"1 stop 2\"");
211 hits = searcher.search(q, null, 1000).scoreDocs;
212 assertEquals(0, hits.length);
214 // when both qp qnd stopFilter propagate increments, we should find the doc.
215 qp = new QueryParser(TEST_VERSION_CURRENT, "field",
216 new StopWhitespaceAnalyzer(true));
217 qp.setEnablePositionIncrements(true);
218 q = (PhraseQuery) qp.parse("\"1 stop 2\"");
219 hits = searcher.search(q, null, 1000).scoreDocs;
220 assertEquals(1, hits.length);
227 private static class StopWhitespaceAnalyzer extends Analyzer {
228 boolean enablePositionIncrements;
229 final WhitespaceAnalyzer a = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
230 public StopWhitespaceAnalyzer(boolean enablePositionIncrements) {
231 this.enablePositionIncrements = enablePositionIncrements;
234 public TokenStream tokenStream(String fieldName, Reader reader) {
235 TokenStream ts = a.tokenStream(fieldName,reader);
236 return new StopFilter(enablePositionIncrements?TEST_VERSION_CURRENT:Version.LUCENE_24, ts,
237 new CharArraySet(TEST_VERSION_CURRENT, Collections.singleton("stop"), true));
241 public void testPayloadsPos0() throws Exception {
242 Directory dir = newDirectory();
243 RandomIndexWriter writer = new RandomIndexWriter(random, dir, new TestPayloadAnalyzer());
244 Document doc = new Document();
245 doc.add(new Field("content",
246 new StringReader("a a b c d e a f g h i j a b k k")));
247 writer.addDocument(doc);
249 IndexReader r = writer.getReader();
251 TermPositions tp = r.termPositions(new Term("content", "a"));
253 assertTrue(tp.next());
254 // "a" occurs 4 times
255 assertEquals(4, tp.freq());
257 assertEquals(expected, tp.nextPosition());
258 assertEquals(1, tp.nextPosition());
259 assertEquals(3, tp.nextPosition());
260 assertEquals(6, tp.nextPosition());
262 // only one doc has "a"
263 assertFalse(tp.next());
265 IndexSearcher is = newSearcher(r);
267 SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
268 SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
269 SpanQuery[] sqs = { stq1, stq2 };
270 SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);
273 boolean sawZero = false;
274 //System.out.println("\ngetPayloadSpans test");
275 Spans pspans = snq.getSpans(is.getIndexReader());
276 while (pspans.next()) {
277 //System.out.println(pspans.doc() + " - " + pspans.start() + " - "+ pspans.end());
278 Collection<byte[]> payloads = pspans.getPayload();
279 sawZero |= pspans.start() == 0;
280 count += payloads.size();
282 assertEquals(5, count);
285 //System.out.println("\ngetSpans test");
286 Spans spans = snq.getSpans(is.getIndexReader());
289 while (spans.next()) {
291 sawZero |= spans.start() == 0;
292 //System.out.println(spans.doc() + " - " + spans.start() + " - " + spans.end());
294 assertEquals(4, count);
297 //System.out.println("\nPayloadSpanUtil test");
300 PayloadSpanUtil psu = new PayloadSpanUtil(is.getIndexReader());
301 Collection<byte[]> pls = psu.getPayloadsForQuery(snq);
303 for (byte[] bytes : pls) {
304 String s = new String(bytes);
305 //System.out.println(s);
306 sawZero |= s.equals("pos: 0");
308 assertEquals(5, count);
311 is.getIndexReader().close();
316 final class TestPayloadAnalyzer extends Analyzer {
319 public TokenStream tokenStream(String fieldName, Reader reader) {
320 TokenStream result = new LowerCaseTokenizer(LuceneTestCase.TEST_VERSION_CURRENT, reader);
321 return new PayloadFilter(result, fieldName);
325 final class PayloadFilter extends TokenFilter {
332 final PositionIncrementAttribute posIncrAttr;
333 final PayloadAttribute payloadAttr;
334 final CharTermAttribute termAttr;
336 public PayloadFilter(TokenStream input, String fieldName) {
338 this.fieldName = fieldName;
341 posIncrAttr = input.addAttribute(PositionIncrementAttribute.class);
342 payloadAttr = input.addAttribute(PayloadAttribute.class);
343 termAttr = input.addAttribute(CharTermAttribute.class);
347 public boolean incrementToken() throws IOException {
348 if (input.incrementToken()) {
349 payloadAttr.setPayload(new Payload(("pos: " + pos).getBytes()));
356 posIncrAttr.setPositionIncrement(posIncr);
358 if (TestPositionIncrement.VERBOSE) {
359 System.out.println("term=" + termAttr + " pos=" + pos);