+++ /dev/null
-package org.apache.lucene.search;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.io.Reader;
-import java.io.IOException;
-import java.io.StringReader;
-import java.util.Collection;
-import java.util.Collections;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.StopFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceAnalyzer;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.CharArraySet;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermPositions;
-import org.apache.lucene.queryParser.QueryParser;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.analysis.LowerCaseTokenizer;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.index.Payload;
-import org.apache.lucene.search.payloads.PayloadSpanUtil;
-import org.apache.lucene.search.spans.SpanNearQuery;
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.spans.SpanTermQuery;
-import org.apache.lucene.search.spans.Spans;
-import org.apache.lucene.util.Version;
-import org.apache.lucene.util.LuceneTestCase;
-
-/**
- * Term position unit test.
- *
- *
- * @version $Revision: 1161586 $
- */
-public class TestPositionIncrement extends LuceneTestCase {
-
- public void testSetPosition() throws Exception {
- Analyzer analyzer = new Analyzer() {
- @Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new TokenStream() {
- private final String[] TOKENS = {"1", "2", "3", "4", "5"};
- private final int[] INCREMENTS = {0, 2, 1, 0, 1};
- private int i = 0;
-
- PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
- CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
-
- @Override
- public boolean incrementToken() {
- if (i == TOKENS.length)
- return false;
- clearAttributes();
- termAtt.append(TOKENS[i]);
- offsetAtt.setOffset(i,i);
- posIncrAtt.setPositionIncrement(INCREMENTS[i]);
- i++;
- return true;
- }
-
- @Override
- public void reset() throws IOException {
- super.reset();
- this.i = 0;
- }
- };
- }
- };
- Directory store = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random, store, analyzer);
- Document d = new Document();
- d.add(newField("field", "bogus", Field.Store.YES, Field.Index.ANALYZED));
- writer.addDocument(d);
- IndexReader reader = writer.getReader();
- writer.close();
-
-
- IndexSearcher searcher = newSearcher(reader);
-
- TermPositions pos = searcher.getIndexReader().termPositions(new Term("field", "1"));
- pos.next();
- // first token should be at position 0
- assertEquals(0, pos.nextPosition());
-
- pos = searcher.getIndexReader().termPositions(new Term("field", "2"));
- pos.next();
- // second token should be at position 2
- assertEquals(2, pos.nextPosition());
-
- PhraseQuery q;
- ScoreDoc[] hits;
-
- q = new PhraseQuery();
- q.add(new Term("field", "1"));
- q.add(new Term("field", "2"));
- hits = searcher.search(q, null, 1000).scoreDocs;
- assertEquals(0, hits.length);
-
- // same as previous, just specify positions explicitely.
- q = new PhraseQuery();
- q.add(new Term("field", "1"),0);
- q.add(new Term("field", "2"),1);
- hits = searcher.search(q, null, 1000).scoreDocs;
- assertEquals(0, hits.length);
-
- // specifying correct positions should find the phrase.
- q = new PhraseQuery();
- q.add(new Term("field", "1"),0);
- q.add(new Term("field", "2"),2);
- hits = searcher.search(q, null, 1000).scoreDocs;
- assertEquals(1, hits.length);
-
- q = new PhraseQuery();
- q.add(new Term("field", "2"));
- q.add(new Term("field", "3"));
- hits = searcher.search(q, null, 1000).scoreDocs;
- assertEquals(1, hits.length);
-
- q = new PhraseQuery();
- q.add(new Term("field", "3"));
- q.add(new Term("field", "4"));
- hits = searcher.search(q, null, 1000).scoreDocs;
- assertEquals(0, hits.length);
-
- // phrase query would find it when correct positions are specified.
- q = new PhraseQuery();
- q.add(new Term("field", "3"),0);
- q.add(new Term("field", "4"),0);
- hits = searcher.search(q, null, 1000).scoreDocs;
- assertEquals(1, hits.length);
-
- // phrase query should fail for non existing searched term
- // even if there exist another searched terms in the same searched position.
- q = new PhraseQuery();
- q.add(new Term("field", "3"),0);
- q.add(new Term("field", "9"),0);
- hits = searcher.search(q, null, 1000).scoreDocs;
- assertEquals(0, hits.length);
-
- // multi-phrase query should succed for non existing searched term
- // because there exist another searched terms in the same searched position.
- MultiPhraseQuery mq = new MultiPhraseQuery();
- mq.add(new Term[]{new Term("field", "3"),new Term("field", "9")},0);
- hits = searcher.search(mq, null, 1000).scoreDocs;
- assertEquals(1, hits.length);
-
- q = new PhraseQuery();
- q.add(new Term("field", "2"));
- q.add(new Term("field", "4"));
- hits = searcher.search(q, null, 1000).scoreDocs;
- assertEquals(1, hits.length);
-
- q = new PhraseQuery();
- q.add(new Term("field", "3"));
- q.add(new Term("field", "5"));
- hits = searcher.search(q, null, 1000).scoreDocs;
- assertEquals(1, hits.length);
-
- q = new PhraseQuery();
- q.add(new Term("field", "4"));
- q.add(new Term("field", "5"));
- hits = searcher.search(q, null, 1000).scoreDocs;
- assertEquals(1, hits.length);
-
- q = new PhraseQuery();
- q.add(new Term("field", "2"));
- q.add(new Term("field", "5"));
- hits = searcher.search(q, null, 1000).scoreDocs;
- assertEquals(0, hits.length);
-
- // should not find "1 2" because there is a gap of 1 in the index
- QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field",
- new StopWhitespaceAnalyzer(false));
- q = (PhraseQuery) qp.parse("\"1 2\"");
- hits = searcher.search(q, null, 1000).scoreDocs;
- assertEquals(0, hits.length);
-
- // omitted stop word cannot help because stop filter swallows the increments.
- q = (PhraseQuery) qp.parse("\"1 stop 2\"");
- hits = searcher.search(q, null, 1000).scoreDocs;
- assertEquals(0, hits.length);
-
- // query parser alone won't help, because stop filter swallows the increments.
- qp.setEnablePositionIncrements(true);
- q = (PhraseQuery) qp.parse("\"1 stop 2\"");
- hits = searcher.search(q, null, 1000).scoreDocs;
- assertEquals(0, hits.length);
-
- // stop filter alone won't help, because query parser swallows the increments.
- qp.setEnablePositionIncrements(false);
- q = (PhraseQuery) qp.parse("\"1 stop 2\"");
- hits = searcher.search(q, null, 1000).scoreDocs;
- assertEquals(0, hits.length);
-
- // when both qp qnd stopFilter propagate increments, we should find the doc.
- qp = new QueryParser(TEST_VERSION_CURRENT, "field",
- new StopWhitespaceAnalyzer(true));
- qp.setEnablePositionIncrements(true);
- q = (PhraseQuery) qp.parse("\"1 stop 2\"");
- hits = searcher.search(q, null, 1000).scoreDocs;
- assertEquals(1, hits.length);
-
- searcher.close();
- reader.close();
- store.close();
- }
-
- private static class StopWhitespaceAnalyzer extends Analyzer {
- boolean enablePositionIncrements;
- final WhitespaceAnalyzer a = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
- public StopWhitespaceAnalyzer(boolean enablePositionIncrements) {
- this.enablePositionIncrements = enablePositionIncrements;
- }
- @Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- TokenStream ts = a.tokenStream(fieldName,reader);
- return new StopFilter(enablePositionIncrements?TEST_VERSION_CURRENT:Version.LUCENE_24, ts,
- new CharArraySet(TEST_VERSION_CURRENT, Collections.singleton("stop"), true));
- }
- }
-
- public void testPayloadsPos0() throws Exception {
- Directory dir = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random, dir, new TestPayloadAnalyzer());
- Document doc = new Document();
- doc.add(new Field("content",
- new StringReader("a a b c d e a f g h i j a b k k")));
- writer.addDocument(doc);
-
- IndexReader r = writer.getReader();
-
- TermPositions tp = r.termPositions(new Term("content", "a"));
- int count = 0;
- assertTrue(tp.next());
- // "a" occurs 4 times
- assertEquals(4, tp.freq());
- int expected = 0;
- assertEquals(expected, tp.nextPosition());
- assertEquals(1, tp.nextPosition());
- assertEquals(3, tp.nextPosition());
- assertEquals(6, tp.nextPosition());
-
- // only one doc has "a"
- assertFalse(tp.next());
-
- IndexSearcher is = newSearcher(r);
-
- SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
- SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
- SpanQuery[] sqs = { stq1, stq2 };
- SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);
-
- count = 0;
- boolean sawZero = false;
- //System.out.println("\ngetPayloadSpans test");
- Spans pspans = snq.getSpans(is.getIndexReader());
- while (pspans.next()) {
- //System.out.println(pspans.doc() + " - " + pspans.start() + " - "+ pspans.end());
- Collection<byte[]> payloads = pspans.getPayload();
- sawZero |= pspans.start() == 0;
- count += payloads.size();
- }
- assertEquals(5, count);
- assertTrue(sawZero);
-
- //System.out.println("\ngetSpans test");
- Spans spans = snq.getSpans(is.getIndexReader());
- count = 0;
- sawZero = false;
- while (spans.next()) {
- count++;
- sawZero |= spans.start() == 0;
- //System.out.println(spans.doc() + " - " + spans.start() + " - " + spans.end());
- }
- assertEquals(4, count);
- assertTrue(sawZero);
-
- //System.out.println("\nPayloadSpanUtil test");
-
- sawZero = false;
- PayloadSpanUtil psu = new PayloadSpanUtil(is.getIndexReader());
- Collection<byte[]> pls = psu.getPayloadsForQuery(snq);
- count = pls.size();
- for (byte[] bytes : pls) {
- String s = new String(bytes);
- //System.out.println(s);
- sawZero |= s.equals("pos: 0");
- }
- assertEquals(5, count);
- assertTrue(sawZero);
- writer.close();
- is.getIndexReader().close();
- dir.close();
- }
-}
-
-final class TestPayloadAnalyzer extends Analyzer {
-
- @Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- TokenStream result = new LowerCaseTokenizer(LuceneTestCase.TEST_VERSION_CURRENT, reader);
- return new PayloadFilter(result, fieldName);
- }
-}
-
-final class PayloadFilter extends TokenFilter {
- String fieldName;
-
- int pos;
-
- int i;
-
- final PositionIncrementAttribute posIncrAttr;
- final PayloadAttribute payloadAttr;
- final CharTermAttribute termAttr;
-
- public PayloadFilter(TokenStream input, String fieldName) {
- super(input);
- this.fieldName = fieldName;
- pos = 0;
- i = 0;
- posIncrAttr = input.addAttribute(PositionIncrementAttribute.class);
- payloadAttr = input.addAttribute(PayloadAttribute.class);
- termAttr = input.addAttribute(CharTermAttribute.class);
- }
-
- @Override
- public boolean incrementToken() throws IOException {
- if (input.incrementToken()) {
- payloadAttr.setPayload(new Payload(("pos: " + pos).getBytes()));
- int posIncr;
- if (i % 2 == 1) {
- posIncr = 1;
- } else {
- posIncr = 0;
- }
- posIncrAttr.setPositionIncrement(posIncr);
- pos += posIncr;
- if (TestPositionIncrement.VERBOSE) {
- System.out.println("term=" + termAttr + " pos=" + pos);
- }
- i++;
- return true;
- } else {
- return false;
- }
- }
-}