1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.io.Reader;
22 import java.io.IOException;
23 import java.io.StringReader;
24 import java.util.Collection;
25 import java.util.Collections;
26 import org.apache.lucene.analysis.Analyzer;
27 import org.apache.lucene.analysis.StopFilter;
28 import org.apache.lucene.analysis.TokenStream;
29 import org.apache.lucene.analysis.WhitespaceAnalyzer;
30 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
31 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
32 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
33 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
34 import org.apache.lucene.analysis.CharArraySet;
35 import org.apache.lucene.document.Document;
36 import org.apache.lucene.document.Field;
37 import org.apache.lucene.index.IndexReader;
38 import org.apache.lucene.index.RandomIndexWriter;
39 import org.apache.lucene.index.Term;
40 import org.apache.lucene.index.TermPositions;
41 import org.apache.lucene.queryParser.QueryParser;
42 import org.apache.lucene.store.Directory;
43 import org.apache.lucene.analysis.LowerCaseTokenizer;
44 import org.apache.lucene.analysis.TokenFilter;
45 import org.apache.lucene.index.Payload;
46 import org.apache.lucene.search.payloads.PayloadSpanUtil;
47 import org.apache.lucene.search.spans.SpanNearQuery;
48 import org.apache.lucene.search.spans.SpanQuery;
49 import org.apache.lucene.search.spans.SpanTermQuery;
50 import org.apache.lucene.search.spans.Spans;
51 import org.apache.lucene.util.Version;
52 import org.apache.lucene.util.LuceneTestCase;
55 * Term position unit test.
58 * @version $Revision: 1161586 $
60 public class TestPositionIncrement extends LuceneTestCase {
62 public void testSetPosition() throws Exception {
63 Analyzer analyzer = new Analyzer() {
65 public TokenStream tokenStream(String fieldName, Reader reader) {
66 return new TokenStream() {
67 private final String[] TOKENS = {"1", "2", "3", "4", "5"};
68 private final int[] INCREMENTS = {0, 2, 1, 0, 1};
71 PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
72 CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
73 OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
76 public boolean incrementToken() {
77 if (i == TOKENS.length)
80 termAtt.append(TOKENS[i]);
81 offsetAtt.setOffset(i,i);
82 posIncrAtt.setPositionIncrement(INCREMENTS[i]);
88 public void reset() throws IOException {
95 Directory store = newDirectory();
96 RandomIndexWriter writer = new RandomIndexWriter(random, store, analyzer);
97 Document d = new Document();
98 d.add(newField("field", "bogus", Field.Store.YES, Field.Index.ANALYZED));
99 writer.addDocument(d);
100 IndexReader reader = writer.getReader();
104 IndexSearcher searcher = newSearcher(reader);
106 TermPositions pos = searcher.getIndexReader().termPositions(new Term("field", "1"));
108 // first token should be at position 0
109 assertEquals(0, pos.nextPosition());
111 pos = searcher.getIndexReader().termPositions(new Term("field", "2"));
113 // second token should be at position 2
114 assertEquals(2, pos.nextPosition());
119 q = new PhraseQuery();
120 q.add(new Term("field", "1"));
121 q.add(new Term("field", "2"));
122 hits = searcher.search(q, null, 1000).scoreDocs;
123 assertEquals(0, hits.length);
125 // same as previous, just specify positions explicitely.
126 q = new PhraseQuery();
127 q.add(new Term("field", "1"),0);
128 q.add(new Term("field", "2"),1);
129 hits = searcher.search(q, null, 1000).scoreDocs;
130 assertEquals(0, hits.length);
132 // specifying correct positions should find the phrase.
133 q = new PhraseQuery();
134 q.add(new Term("field", "1"),0);
135 q.add(new Term("field", "2"),2);
136 hits = searcher.search(q, null, 1000).scoreDocs;
137 assertEquals(1, hits.length);
139 q = new PhraseQuery();
140 q.add(new Term("field", "2"));
141 q.add(new Term("field", "3"));
142 hits = searcher.search(q, null, 1000).scoreDocs;
143 assertEquals(1, hits.length);
145 q = new PhraseQuery();
146 q.add(new Term("field", "3"));
147 q.add(new Term("field", "4"));
148 hits = searcher.search(q, null, 1000).scoreDocs;
149 assertEquals(0, hits.length);
151 // phrase query would find it when correct positions are specified.
152 q = new PhraseQuery();
153 q.add(new Term("field", "3"),0);
154 q.add(new Term("field", "4"),0);
155 hits = searcher.search(q, null, 1000).scoreDocs;
156 assertEquals(1, hits.length);
158 // phrase query should fail for non existing searched term
159 // even if there exist another searched terms in the same searched position.
160 q = new PhraseQuery();
161 q.add(new Term("field", "3"),0);
162 q.add(new Term("field", "9"),0);
163 hits = searcher.search(q, null, 1000).scoreDocs;
164 assertEquals(0, hits.length);
166 // multi-phrase query should succed for non existing searched term
167 // because there exist another searched terms in the same searched position.
168 MultiPhraseQuery mq = new MultiPhraseQuery();
169 mq.add(new Term[]{new Term("field", "3"),new Term("field", "9")},0);
170 hits = searcher.search(mq, null, 1000).scoreDocs;
171 assertEquals(1, hits.length);
173 q = new PhraseQuery();
174 q.add(new Term("field", "2"));
175 q.add(new Term("field", "4"));
176 hits = searcher.search(q, null, 1000).scoreDocs;
177 assertEquals(1, hits.length);
179 q = new PhraseQuery();
180 q.add(new Term("field", "3"));
181 q.add(new Term("field", "5"));
182 hits = searcher.search(q, null, 1000).scoreDocs;
183 assertEquals(1, hits.length);
185 q = new PhraseQuery();
186 q.add(new Term("field", "4"));
187 q.add(new Term("field", "5"));
188 hits = searcher.search(q, null, 1000).scoreDocs;
189 assertEquals(1, hits.length);
191 q = new PhraseQuery();
192 q.add(new Term("field", "2"));
193 q.add(new Term("field", "5"));
194 hits = searcher.search(q, null, 1000).scoreDocs;
195 assertEquals(0, hits.length);
197 // should not find "1 2" because there is a gap of 1 in the index
198 QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field",
199 new StopWhitespaceAnalyzer(false));
200 q = (PhraseQuery) qp.parse("\"1 2\"");
201 hits = searcher.search(q, null, 1000).scoreDocs;
202 assertEquals(0, hits.length);
204 // omitted stop word cannot help because stop filter swallows the increments.
205 q = (PhraseQuery) qp.parse("\"1 stop 2\"");
206 hits = searcher.search(q, null, 1000).scoreDocs;
207 assertEquals(0, hits.length);
209 // query parser alone won't help, because stop filter swallows the increments.
210 qp.setEnablePositionIncrements(true);
211 q = (PhraseQuery) qp.parse("\"1 stop 2\"");
212 hits = searcher.search(q, null, 1000).scoreDocs;
213 assertEquals(0, hits.length);
215 // stop filter alone won't help, because query parser swallows the increments.
216 qp.setEnablePositionIncrements(false);
217 q = (PhraseQuery) qp.parse("\"1 stop 2\"");
218 hits = searcher.search(q, null, 1000).scoreDocs;
219 assertEquals(0, hits.length);
221 // when both qp qnd stopFilter propagate increments, we should find the doc.
222 qp = new QueryParser(TEST_VERSION_CURRENT, "field",
223 new StopWhitespaceAnalyzer(true));
224 qp.setEnablePositionIncrements(true);
225 q = (PhraseQuery) qp.parse("\"1 stop 2\"");
226 hits = searcher.search(q, null, 1000).scoreDocs;
227 assertEquals(1, hits.length);
234 private static class StopWhitespaceAnalyzer extends Analyzer {
235 boolean enablePositionIncrements;
236 final WhitespaceAnalyzer a = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
237 public StopWhitespaceAnalyzer(boolean enablePositionIncrements) {
238 this.enablePositionIncrements = enablePositionIncrements;
241 public TokenStream tokenStream(String fieldName, Reader reader) {
242 TokenStream ts = a.tokenStream(fieldName,reader);
243 return new StopFilter(enablePositionIncrements?TEST_VERSION_CURRENT:Version.LUCENE_24, ts,
244 new CharArraySet(TEST_VERSION_CURRENT, Collections.singleton("stop"), true));
248 public void testPayloadsPos0() throws Exception {
249 Directory dir = newDirectory();
250 RandomIndexWriter writer = new RandomIndexWriter(random, dir, new TestPayloadAnalyzer());
251 Document doc = new Document();
252 doc.add(new Field("content",
253 new StringReader("a a b c d e a f g h i j a b k k")));
254 writer.addDocument(doc);
256 IndexReader r = writer.getReader();
258 TermPositions tp = r.termPositions(new Term("content", "a"));
260 assertTrue(tp.next());
261 // "a" occurs 4 times
262 assertEquals(4, tp.freq());
264 assertEquals(expected, tp.nextPosition());
265 assertEquals(1, tp.nextPosition());
266 assertEquals(3, tp.nextPosition());
267 assertEquals(6, tp.nextPosition());
269 // only one doc has "a"
270 assertFalse(tp.next());
272 IndexSearcher is = newSearcher(r);
274 SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
275 SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
276 SpanQuery[] sqs = { stq1, stq2 };
277 SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);
280 boolean sawZero = false;
281 //System.out.println("\ngetPayloadSpans test");
282 Spans pspans = snq.getSpans(is.getIndexReader());
283 while (pspans.next()) {
284 //System.out.println(pspans.doc() + " - " + pspans.start() + " - "+ pspans.end());
285 Collection<byte[]> payloads = pspans.getPayload();
286 sawZero |= pspans.start() == 0;
287 count += payloads.size();
289 assertEquals(5, count);
292 //System.out.println("\ngetSpans test");
293 Spans spans = snq.getSpans(is.getIndexReader());
296 while (spans.next()) {
298 sawZero |= spans.start() == 0;
299 //System.out.println(spans.doc() + " - " + spans.start() + " - " + spans.end());
301 assertEquals(4, count);
304 //System.out.println("\nPayloadSpanUtil test");
307 PayloadSpanUtil psu = new PayloadSpanUtil(is.getIndexReader());
308 Collection<byte[]> pls = psu.getPayloadsForQuery(snq);
310 for (byte[] bytes : pls) {
311 String s = new String(bytes);
312 //System.out.println(s);
313 sawZero |= s.equals("pos: 0");
315 assertEquals(5, count);
318 is.getIndexReader().close();
323 final class TestPayloadAnalyzer extends Analyzer {
326 public TokenStream tokenStream(String fieldName, Reader reader) {
327 TokenStream result = new LowerCaseTokenizer(LuceneTestCase.TEST_VERSION_CURRENT, reader);
328 return new PayloadFilter(result, fieldName);
332 final class PayloadFilter extends TokenFilter {
339 final PositionIncrementAttribute posIncrAttr;
340 final PayloadAttribute payloadAttr;
341 final CharTermAttribute termAttr;
343 public PayloadFilter(TokenStream input, String fieldName) {
345 this.fieldName = fieldName;
348 posIncrAttr = input.addAttribute(PositionIncrementAttribute.class);
349 payloadAttr = input.addAttribute(PayloadAttribute.class);
350 termAttr = input.addAttribute(CharTermAttribute.class);
354 public boolean incrementToken() throws IOException {
355 if (input.incrementToken()) {
356 payloadAttr.setPayload(new Payload(("pos: " + pos).getBytes()));
363 posIncrAttr.setPositionIncrement(posIncr);
365 if (TestPositionIncrement.VERBOSE) {
366 System.out.println("term=" + termAttr + " pos=" + pos);