lucene-java-3.4.0/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java

   1 package org.apache.lucene.search;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.IOException;
  21 import java.io.Reader;
  22 import java.io.IOException;
  23 import java.io.StringReader;
  24 import java.util.Collection;
  25 import java.util.Collections;
  26 import org.apache.lucene.analysis.Analyzer;
  27 import org.apache.lucene.analysis.StopFilter;
  28 import org.apache.lucene.analysis.TokenStream;
  29 import org.apache.lucene.analysis.WhitespaceAnalyzer;
  30 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
  31 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
  32 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
  33 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  34 import org.apache.lucene.analysis.CharArraySet;
  35 import org.apache.lucene.document.Document;
  36 import org.apache.lucene.document.Field;
  37 import org.apache.lucene.index.IndexReader;
  38 import org.apache.lucene.index.RandomIndexWriter;
  39 import org.apache.lucene.index.Term;
  40 import org.apache.lucene.index.TermPositions;
  41 import org.apache.lucene.queryParser.QueryParser;
  42 import org.apache.lucene.store.Directory;
  43 import org.apache.lucene.analysis.LowerCaseTokenizer;
  44 import org.apache.lucene.analysis.TokenFilter;
  45 import org.apache.lucene.index.Payload;
  46 import org.apache.lucene.search.payloads.PayloadSpanUtil;
  47 import org.apache.lucene.search.spans.SpanNearQuery;
  48 import org.apache.lucene.search.spans.SpanQuery;
  49 import org.apache.lucene.search.spans.SpanTermQuery;
  50 import org.apache.lucene.search.spans.Spans;
  51 import org.apache.lucene.util.Version;
  52 import org.apache.lucene.util.LuceneTestCase;
  53
  54 /**
  55  * Term position unit test.
  56  *
  57  *
  58  * @version $Revision: 1161586 $
  59  */
  60 public class TestPositionIncrement extends LuceneTestCase {
  61
  62   public void testSetPosition() throws Exception {
  63     Analyzer analyzer = new Analyzer() {
  64       @Override
  65       public TokenStream tokenStream(String fieldName, Reader reader) {
  66         return new TokenStream() {
  67           private final String[] TOKENS = {"1", "2", "3", "4", "5"};
  68           private final int[] INCREMENTS = {0, 2, 1, 0, 1};
  69           private int i = 0;
  70
  71           PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
  72           CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  73           OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
  74
  75           @Override
  76           public boolean incrementToken() {
  77             if (i == TOKENS.length)
  78               return false;
  79             clearAttributes();
  80             termAtt.append(TOKENS[i]);
  81             offsetAtt.setOffset(i,i);
  82             posIncrAtt.setPositionIncrement(INCREMENTS[i]);
  83             i++;
  84             return true;
  85           }
  86
  87           @Override
  88           public void reset() throws IOException {
  89             super.reset();
  90             this.i = 0;
  91           }
  92         };
  93       }
  94     };
  95     Directory store = newDirectory();
  96     RandomIndexWriter writer = new RandomIndexWriter(random, store, analyzer);
  97     Document d = new Document();
  98     d.add(newField("field", "bogus", Field.Store.YES, Field.Index.ANALYZED));
  99     writer.addDocument(d);
 100     IndexReader reader = writer.getReader();
 101     writer.close();
 102
 103
 104     IndexSearcher searcher = newSearcher(reader);
 105
 106     TermPositions pos = searcher.getIndexReader().termPositions(new Term("field", "1"));
 107     pos.next();
 108     // first token should be at position 0
 109     assertEquals(0, pos.nextPosition());
 110
 111     pos = searcher.getIndexReader().termPositions(new Term("field", "2"));
 112     pos.next();
 113     // second token should be at position 2
 114     assertEquals(2, pos.nextPosition());
 115
 116     PhraseQuery q;
 117     ScoreDoc[] hits;
 118
 119     q = new PhraseQuery();
 120     q.add(new Term("field", "1"));
 121     q.add(new Term("field", "2"));
 122     hits = searcher.search(q, null, 1000).scoreDocs;
 123     assertEquals(0, hits.length);
 124
 125     // same as previous, just specify positions explicitely.
 126     q = new PhraseQuery();
 127     q.add(new Term("field", "1"),0);
 128     q.add(new Term("field", "2"),1);
 129     hits = searcher.search(q, null, 1000).scoreDocs;
 130     assertEquals(0, hits.length);
 131
 132     // specifying correct positions should find the phrase.
 133     q = new PhraseQuery();
 134     q.add(new Term("field", "1"),0);
 135     q.add(new Term("field", "2"),2);
 136     hits = searcher.search(q, null, 1000).scoreDocs;
 137     assertEquals(1, hits.length);
 138
 139     q = new PhraseQuery();
 140     q.add(new Term("field", "2"));
 141     q.add(new Term("field", "3"));
 142     hits = searcher.search(q, null, 1000).scoreDocs;
 143     assertEquals(1, hits.length);
 144
 145     q = new PhraseQuery();
 146     q.add(new Term("field", "3"));
 147     q.add(new Term("field", "4"));
 148     hits = searcher.search(q, null, 1000).scoreDocs;
 149     assertEquals(0, hits.length);
 150
 151     // phrase query would find it when correct positions are specified.
 152     q = new PhraseQuery();
 153     q.add(new Term("field", "3"),0);
 154     q.add(new Term("field", "4"),0);
 155     hits = searcher.search(q, null, 1000).scoreDocs;
 156     assertEquals(1, hits.length);
 157
 158     // phrase query should fail for non existing searched term
 159     // even if there exist another searched terms in the same searched position.
 160     q = new PhraseQuery();
 161     q.add(new Term("field", "3"),0);
 162     q.add(new Term("field", "9"),0);
 163     hits = searcher.search(q, null, 1000).scoreDocs;
 164     assertEquals(0, hits.length);
 165
 166     // multi-phrase query should succed for non existing searched term
 167     // because there exist another searched terms in the same searched position.
 168     MultiPhraseQuery mq = new MultiPhraseQuery();
 169     mq.add(new Term[]{new Term("field", "3"),new Term("field", "9")},0);
 170     hits = searcher.search(mq, null, 1000).scoreDocs;
 171     assertEquals(1, hits.length);
 172
 173     q = new PhraseQuery();
 174     q.add(new Term("field", "2"));
 175     q.add(new Term("field", "4"));
 176     hits = searcher.search(q, null, 1000).scoreDocs;
 177     assertEquals(1, hits.length);
 178
 179     q = new PhraseQuery();
 180     q.add(new Term("field", "3"));
 181     q.add(new Term("field", "5"));
 182     hits = searcher.search(q, null, 1000).scoreDocs;
 183     assertEquals(1, hits.length);
 184
 185     q = new PhraseQuery();
 186     q.add(new Term("field", "4"));
 187     q.add(new Term("field", "5"));
 188     hits = searcher.search(q, null, 1000).scoreDocs;
 189     assertEquals(1, hits.length);
 190
 191     q = new PhraseQuery();
 192     q.add(new Term("field", "2"));
 193     q.add(new Term("field", "5"));
 194     hits = searcher.search(q, null, 1000).scoreDocs;
 195     assertEquals(0, hits.length);
 196
 197     // should not find "1 2" because there is a gap of 1 in the index
 198     QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field",
 199                                      new StopWhitespaceAnalyzer(false));
 200     q = (PhraseQuery) qp.parse("\"1 2\"");
 201     hits = searcher.search(q, null, 1000).scoreDocs;
 202     assertEquals(0, hits.length);
 203
 204     // omitted stop word cannot help because stop filter swallows the increments.
 205     q = (PhraseQuery) qp.parse("\"1 stop 2\"");
 206     hits = searcher.search(q, null, 1000).scoreDocs;
 207     assertEquals(0, hits.length);
 208
 209     // query parser alone won't help, because stop filter swallows the increments.
 210     qp.setEnablePositionIncrements(true);
 211     q = (PhraseQuery) qp.parse("\"1 stop 2\"");
 212     hits = searcher.search(q, null, 1000).scoreDocs;
 213     assertEquals(0, hits.length);
 214
 215     // stop filter alone won't help, because query parser swallows the increments.
 216     qp.setEnablePositionIncrements(false);
 217     q = (PhraseQuery) qp.parse("\"1 stop 2\"");
 218     hits = searcher.search(q, null, 1000).scoreDocs;
 219     assertEquals(0, hits.length);
 220
 221     // when both qp qnd stopFilter propagate increments, we should find the doc.
 222     qp = new QueryParser(TEST_VERSION_CURRENT, "field",
 223                          new StopWhitespaceAnalyzer(true));
 224     qp.setEnablePositionIncrements(true);
 225     q = (PhraseQuery) qp.parse("\"1 stop 2\"");
 226     hits = searcher.search(q, null, 1000).scoreDocs;
 227     assertEquals(1, hits.length);
 228
 229     searcher.close();
 230     reader.close();
 231     store.close();
 232   }
 233
 234   private static class StopWhitespaceAnalyzer extends Analyzer {
 235     boolean enablePositionIncrements;
 236     final WhitespaceAnalyzer a = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
 237     public StopWhitespaceAnalyzer(boolean enablePositionIncrements) {
 238       this.enablePositionIncrements = enablePositionIncrements;
 239     }
 240     @Override
 241     public TokenStream tokenStream(String fieldName, Reader reader) {
 242       TokenStream ts = a.tokenStream(fieldName,reader);
 243       return new StopFilter(enablePositionIncrements?TEST_VERSION_CURRENT:Version.LUCENE_24, ts,
 244           new CharArraySet(TEST_VERSION_CURRENT, Collections.singleton("stop"), true));
 245     }
 246   }
 247
 248   public void testPayloadsPos0() throws Exception {
 249     Directory dir = newDirectory();
 250     RandomIndexWriter writer = new RandomIndexWriter(random, dir, new TestPayloadAnalyzer());
 251     Document doc = new Document();
 252     doc.add(new Field("content",
 253                       new StringReader("a a b c d e a f g h i j a b k k")));
 254     writer.addDocument(doc);
 255
 256     IndexReader r = writer.getReader();
 257
 258     TermPositions tp = r.termPositions(new Term("content", "a"));
 259     int count = 0;
 260     assertTrue(tp.next());
 261     // "a" occurs 4 times
 262     assertEquals(4, tp.freq());
 263     int expected = 0;
 264     assertEquals(expected, tp.nextPosition());
 265     assertEquals(1, tp.nextPosition());
 266     assertEquals(3, tp.nextPosition());
 267     assertEquals(6, tp.nextPosition());
 268
 269     // only one doc has "a"
 270     assertFalse(tp.next());
 271
 272     IndexSearcher is = newSearcher(r);
 273
 274     SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
 275     SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
 276     SpanQuery[] sqs = { stq1, stq2 };
 277     SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);
 278
 279     count = 0;
 280     boolean sawZero = false;
 281     //System.out.println("\ngetPayloadSpans test");
 282     Spans pspans = snq.getSpans(is.getIndexReader());
 283     while (pspans.next()) {
 284       //System.out.println(pspans.doc() + " - " + pspans.start() + " - "+ pspans.end());
 285       Collection<byte[]> payloads = pspans.getPayload();
 286       sawZero |= pspans.start() == 0;
 287       count += payloads.size();
 288     }
 289     assertEquals(5, count);
 290     assertTrue(sawZero);
 291
 292     //System.out.println("\ngetSpans test");
 293     Spans spans = snq.getSpans(is.getIndexReader());
 294     count = 0;
 295     sawZero = false;
 296     while (spans.next()) {
 297       count++;
 298       sawZero |= spans.start() == 0;
 299       //System.out.println(spans.doc() + " - " + spans.start() + " - " + spans.end());
 300     }
 301     assertEquals(4, count);
 302     assertTrue(sawZero);
 303
 304     //System.out.println("\nPayloadSpanUtil test");
 305
 306     sawZero = false;
 307     PayloadSpanUtil psu = new PayloadSpanUtil(is.getIndexReader());
 308     Collection<byte[]> pls = psu.getPayloadsForQuery(snq);
 309     count = pls.size();
 310     for (byte[] bytes : pls) {
 311       String s = new String(bytes);
 312       //System.out.println(s);
 313       sawZero |= s.equals("pos: 0");
 314     }
 315     assertEquals(5, count);
 316     assertTrue(sawZero);
 317     writer.close();
 318     is.getIndexReader().close();
 319     dir.close();
 320   }
 321 }
 322
 323 final class TestPayloadAnalyzer extends Analyzer {
 324
 325   @Override
 326   public TokenStream tokenStream(String fieldName, Reader reader) {
 327     TokenStream result = new LowerCaseTokenizer(LuceneTestCase.TEST_VERSION_CURRENT, reader);
 328     return new PayloadFilter(result, fieldName);
 329   }
 330 }
 331
 332 final class PayloadFilter extends TokenFilter {
 333   String fieldName;
 334
 335   int pos;
 336
 337   int i;
 338
 339   final PositionIncrementAttribute posIncrAttr;
 340   final PayloadAttribute payloadAttr;
 341   final CharTermAttribute termAttr;
 342
 343   public PayloadFilter(TokenStream input, String fieldName) {
 344     super(input);
 345     this.fieldName = fieldName;
 346     pos = 0;
 347     i = 0;
 348     posIncrAttr = input.addAttribute(PositionIncrementAttribute.class);
 349     payloadAttr = input.addAttribute(PayloadAttribute.class);
 350     termAttr = input.addAttribute(CharTermAttribute.class);
 351   }
 352
 353   @Override
 354   public boolean incrementToken() throws IOException {
 355     if (input.incrementToken()) {
 356       payloadAttr.setPayload(new Payload(("pos: " + pos).getBytes()));
 357       int posIncr;
 358       if (i % 2 == 1) {
 359         posIncr = 1;
 360       } else {
 361         posIncr = 0;
 362       }
 363       posIncrAttr.setPositionIncrement(posIncr);
 364       pos += posIncr;
 365       if (TestPositionIncrement.VERBOSE) {
 366         System.out.println("term=" + termAttr + " pos=" + pos);
 367       }
 368       i++;
 369       return true;
 370     } else {
 371       return false;
 372     }
 373   }
 374 }