--- /dev/null
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.Collection;
+import java.util.Collections;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermPositions;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.analysis.LowerCaseTokenizer;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.index.Payload;
+import org.apache.lucene.search.payloads.PayloadSpanUtil;
+import org.apache.lucene.search.spans.SpanNearQuery;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.util.Version;
+import org.apache.lucene.util.LuceneTestCase;
+
+/**
+ * Term position unit test.
+ *
+ *
+ * @version $Revision: 1161586 $
+ */
+public class TestPositionIncrement extends LuceneTestCase {
+
+ public void testSetPosition() throws Exception {
+ Analyzer analyzer = new Analyzer() {
+ @Override
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ return new TokenStream() {
+ private final String[] TOKENS = {"1", "2", "3", "4", "5"};
+ private final int[] INCREMENTS = {0, 2, 1, 0, 1};
+ private int i = 0;
+
+ PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+ CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+
+ @Override
+ public boolean incrementToken() {
+ if (i == TOKENS.length)
+ return false;
+ clearAttributes();
+ termAtt.append(TOKENS[i]);
+ offsetAtt.setOffset(i,i);
+ posIncrAtt.setPositionIncrement(INCREMENTS[i]);
+ i++;
+ return true;
+ }
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ this.i = 0;
+ }
+ };
+ }
+ };
+ Directory store = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random, store, analyzer);
+ Document d = new Document();
+ d.add(newField("field", "bogus", Field.Store.YES, Field.Index.ANALYZED));
+ writer.addDocument(d);
+ IndexReader reader = writer.getReader();
+ writer.close();
+
+
+ IndexSearcher searcher = newSearcher(reader);
+
+ TermPositions pos = searcher.getIndexReader().termPositions(new Term("field", "1"));
+ pos.next();
+ // first token should be at position 0
+ assertEquals(0, pos.nextPosition());
+
+ pos = searcher.getIndexReader().termPositions(new Term("field", "2"));
+ pos.next();
+ // second token should be at position 2
+ assertEquals(2, pos.nextPosition());
+
+ PhraseQuery q;
+ ScoreDoc[] hits;
+
+ q = new PhraseQuery();
+ q.add(new Term("field", "1"));
+ q.add(new Term("field", "2"));
+ hits = searcher.search(q, null, 1000).scoreDocs;
+ assertEquals(0, hits.length);
+
+ // same as previous, just specify positions explicitely.
+ q = new PhraseQuery();
+ q.add(new Term("field", "1"),0);
+ q.add(new Term("field", "2"),1);
+ hits = searcher.search(q, null, 1000).scoreDocs;
+ assertEquals(0, hits.length);
+
+ // specifying correct positions should find the phrase.
+ q = new PhraseQuery();
+ q.add(new Term("field", "1"),0);
+ q.add(new Term("field", "2"),2);
+ hits = searcher.search(q, null, 1000).scoreDocs;
+ assertEquals(1, hits.length);
+
+ q = new PhraseQuery();
+ q.add(new Term("field", "2"));
+ q.add(new Term("field", "3"));
+ hits = searcher.search(q, null, 1000).scoreDocs;
+ assertEquals(1, hits.length);
+
+ q = new PhraseQuery();
+ q.add(new Term("field", "3"));
+ q.add(new Term("field", "4"));
+ hits = searcher.search(q, null, 1000).scoreDocs;
+ assertEquals(0, hits.length);
+
+ // phrase query would find it when correct positions are specified.
+ q = new PhraseQuery();
+ q.add(new Term("field", "3"),0);
+ q.add(new Term("field", "4"),0);
+ hits = searcher.search(q, null, 1000).scoreDocs;
+ assertEquals(1, hits.length);
+
+ // phrase query should fail for non existing searched term
+ // even if there exist another searched terms in the same searched position.
+ q = new PhraseQuery();
+ q.add(new Term("field", "3"),0);
+ q.add(new Term("field", "9"),0);
+ hits = searcher.search(q, null, 1000).scoreDocs;
+ assertEquals(0, hits.length);
+
+ // multi-phrase query should succed for non existing searched term
+ // because there exist another searched terms in the same searched position.
+ MultiPhraseQuery mq = new MultiPhraseQuery();
+ mq.add(new Term[]{new Term("field", "3"),new Term("field", "9")},0);
+ hits = searcher.search(mq, null, 1000).scoreDocs;
+ assertEquals(1, hits.length);
+
+ q = new PhraseQuery();
+ q.add(new Term("field", "2"));
+ q.add(new Term("field", "4"));
+ hits = searcher.search(q, null, 1000).scoreDocs;
+ assertEquals(1, hits.length);
+
+ q = new PhraseQuery();
+ q.add(new Term("field", "3"));
+ q.add(new Term("field", "5"));
+ hits = searcher.search(q, null, 1000).scoreDocs;
+ assertEquals(1, hits.length);
+
+ q = new PhraseQuery();
+ q.add(new Term("field", "4"));
+ q.add(new Term("field", "5"));
+ hits = searcher.search(q, null, 1000).scoreDocs;
+ assertEquals(1, hits.length);
+
+ q = new PhraseQuery();
+ q.add(new Term("field", "2"));
+ q.add(new Term("field", "5"));
+ hits = searcher.search(q, null, 1000).scoreDocs;
+ assertEquals(0, hits.length);
+
+ // should not find "1 2" because there is a gap of 1 in the index
+ QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field",
+ new StopWhitespaceAnalyzer(false));
+ q = (PhraseQuery) qp.parse("\"1 2\"");
+ hits = searcher.search(q, null, 1000).scoreDocs;
+ assertEquals(0, hits.length);
+
+ // omitted stop word cannot help because stop filter swallows the increments.
+ q = (PhraseQuery) qp.parse("\"1 stop 2\"");
+ hits = searcher.search(q, null, 1000).scoreDocs;
+ assertEquals(0, hits.length);
+
+ // query parser alone won't help, because stop filter swallows the increments.
+ qp.setEnablePositionIncrements(true);
+ q = (PhraseQuery) qp.parse("\"1 stop 2\"");
+ hits = searcher.search(q, null, 1000).scoreDocs;
+ assertEquals(0, hits.length);
+
+ // stop filter alone won't help, because query parser swallows the increments.
+ qp.setEnablePositionIncrements(false);
+ q = (PhraseQuery) qp.parse("\"1 stop 2\"");
+ hits = searcher.search(q, null, 1000).scoreDocs;
+ assertEquals(0, hits.length);
+
+ // when both qp qnd stopFilter propagate increments, we should find the doc.
+ qp = new QueryParser(TEST_VERSION_CURRENT, "field",
+ new StopWhitespaceAnalyzer(true));
+ qp.setEnablePositionIncrements(true);
+ q = (PhraseQuery) qp.parse("\"1 stop 2\"");
+ hits = searcher.search(q, null, 1000).scoreDocs;
+ assertEquals(1, hits.length);
+
+ searcher.close();
+ reader.close();
+ store.close();
+ }
+
+ private static class StopWhitespaceAnalyzer extends Analyzer {
+ boolean enablePositionIncrements;
+ final WhitespaceAnalyzer a = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
+ public StopWhitespaceAnalyzer(boolean enablePositionIncrements) {
+ this.enablePositionIncrements = enablePositionIncrements;
+ }
+ @Override
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ TokenStream ts = a.tokenStream(fieldName,reader);
+ return new StopFilter(enablePositionIncrements?TEST_VERSION_CURRENT:Version.LUCENE_24, ts,
+ new CharArraySet(TEST_VERSION_CURRENT, Collections.singleton("stop"), true));
+ }
+ }
+
+ public void testPayloadsPos0() throws Exception {
+ Directory dir = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random, dir, new TestPayloadAnalyzer());
+ Document doc = new Document();
+ doc.add(new Field("content",
+ new StringReader("a a b c d e a f g h i j a b k k")));
+ writer.addDocument(doc);
+
+ IndexReader r = writer.getReader();
+
+ TermPositions tp = r.termPositions(new Term("content", "a"));
+ int count = 0;
+ assertTrue(tp.next());
+ // "a" occurs 4 times
+ assertEquals(4, tp.freq());
+ int expected = 0;
+ assertEquals(expected, tp.nextPosition());
+ assertEquals(1, tp.nextPosition());
+ assertEquals(3, tp.nextPosition());
+ assertEquals(6, tp.nextPosition());
+
+ // only one doc has "a"
+ assertFalse(tp.next());
+
+ IndexSearcher is = newSearcher(r);
+
+ SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
+ SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
+ SpanQuery[] sqs = { stq1, stq2 };
+ SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);
+
+ count = 0;
+ boolean sawZero = false;
+ //System.out.println("\ngetPayloadSpans test");
+ Spans pspans = snq.getSpans(is.getIndexReader());
+ while (pspans.next()) {
+ //System.out.println(pspans.doc() + " - " + pspans.start() + " - "+ pspans.end());
+ Collection<byte[]> payloads = pspans.getPayload();
+ sawZero |= pspans.start() == 0;
+ count += payloads.size();
+ }
+ assertEquals(5, count);
+ assertTrue(sawZero);
+
+ //System.out.println("\ngetSpans test");
+ Spans spans = snq.getSpans(is.getIndexReader());
+ count = 0;
+ sawZero = false;
+ while (spans.next()) {
+ count++;
+ sawZero |= spans.start() == 0;
+ //System.out.println(spans.doc() + " - " + spans.start() + " - " + spans.end());
+ }
+ assertEquals(4, count);
+ assertTrue(sawZero);
+
+ //System.out.println("\nPayloadSpanUtil test");
+
+ sawZero = false;
+ PayloadSpanUtil psu = new PayloadSpanUtil(is.getIndexReader());
+ Collection<byte[]> pls = psu.getPayloadsForQuery(snq);
+ count = pls.size();
+ for (byte[] bytes : pls) {
+ String s = new String(bytes);
+ //System.out.println(s);
+ sawZero |= s.equals("pos: 0");
+ }
+ assertEquals(5, count);
+ assertTrue(sawZero);
+ writer.close();
+ is.getIndexReader().close();
+ dir.close();
+ }
+}
+
+final class TestPayloadAnalyzer extends Analyzer {
+
+ @Override
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ TokenStream result = new LowerCaseTokenizer(LuceneTestCase.TEST_VERSION_CURRENT, reader);
+ return new PayloadFilter(result, fieldName);
+ }
+}
+
+final class PayloadFilter extends TokenFilter {
+ String fieldName;
+
+ int pos;
+
+ int i;
+
+ final PositionIncrementAttribute posIncrAttr;
+ final PayloadAttribute payloadAttr;
+ final CharTermAttribute termAttr;
+
+ public PayloadFilter(TokenStream input, String fieldName) {
+ super(input);
+ this.fieldName = fieldName;
+ pos = 0;
+ i = 0;
+ posIncrAttr = input.addAttribute(PositionIncrementAttribute.class);
+ payloadAttr = input.addAttribute(PayloadAttribute.class);
+ termAttr = input.addAttribute(CharTermAttribute.class);
+ }
+
+ @Override
+ public boolean incrementToken() throws IOException {
+ if (input.incrementToken()) {
+ payloadAttr.setPayload(new Payload(("pos: " + pos).getBytes()));
+ int posIncr;
+ if (i % 2 == 1) {
+ posIncr = 1;
+ } else {
+ posIncr = 0;
+ }
+ posIncrAttr.setPositionIncrement(posIncr);
+ pos += posIncr;
+ if (TestPositionIncrement.VERBOSE) {
+ System.out.println("term=" + termAttr + " pos=" + pos);
+ }
+ i++;
+ return true;
+ } else {
+ return false;
+ }
+ }
+}