pylucene 3.5.0-3

[pylucene.git] / lucene-java-3.4.0 / lucene / src / test / org / apache / lucene / search / TestPositionIncrement.java
diff --git a/lucene-java-3.4.0/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java b/lucene-java-3.4.0/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java

deleted file mode 100644 (file)

index 3b07e00..0000000
--- a/lucene-java-3.4.0/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java
+++ /dev/null
@@ -1,374 +0,0 @@
-package org.apache.lucene.search;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.io.Reader;
-import java.io.IOException;
-import java.io.StringReader;
-import java.util.Collection;
-import java.util.Collections;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.StopFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceAnalyzer;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.CharArraySet;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermPositions;
-import org.apache.lucene.queryParser.QueryParser;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.analysis.LowerCaseTokenizer;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.index.Payload;
-import org.apache.lucene.search.payloads.PayloadSpanUtil;
-import org.apache.lucene.search.spans.SpanNearQuery;
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.spans.SpanTermQuery;
-import org.apache.lucene.search.spans.Spans;
-import org.apache.lucene.util.Version;
-import org.apache.lucene.util.LuceneTestCase;
-
-/**
- * Term position unit test.
- *
- *
- * @version $Revision: 1161586 $
- */
-public class TestPositionIncrement extends LuceneTestCase {
-
-  public void testSetPosition() throws Exception {
-    Analyzer analyzer = new Analyzer() {
-      @Override
-      public TokenStream tokenStream(String fieldName, Reader reader) {
-        return new TokenStream() {
-          private final String[] TOKENS = {"1", "2", "3", "4", "5"};
-          private final int[] INCREMENTS = {0, 2, 1, 0, 1};
-          private int i = 0;
-
-          PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
-          CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-          OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
-          
-          @Override
-          public boolean incrementToken() {
-            if (i == TOKENS.length)
-              return false;
-            clearAttributes();
-            termAtt.append(TOKENS[i]);
-            offsetAtt.setOffset(i,i);
-            posIncrAtt.setPositionIncrement(INCREMENTS[i]);
-            i++;
-            return true;
-          }
-
-          @Override
-          public void reset() throws IOException {
-            super.reset();
-            this.i = 0;
-          }
-        };
-      }
-    };
-    Directory store = newDirectory();
-    RandomIndexWriter writer = new RandomIndexWriter(random, store, analyzer);
-    Document d = new Document();
-    d.add(newField("field", "bogus", Field.Store.YES, Field.Index.ANALYZED));
-    writer.addDocument(d);
-    IndexReader reader = writer.getReader();
-    writer.close();
-    
-
-    IndexSearcher searcher = newSearcher(reader);
-    
-    TermPositions pos = searcher.getIndexReader().termPositions(new Term("field", "1"));
-    pos.next();
-    // first token should be at position 0
-    assertEquals(0, pos.nextPosition());
-    
-    pos = searcher.getIndexReader().termPositions(new Term("field", "2"));
-    pos.next();
-    // second token should be at position 2
-    assertEquals(2, pos.nextPosition());
-    
-    PhraseQuery q;
-    ScoreDoc[] hits;
-
-    q = new PhraseQuery();
-    q.add(new Term("field", "1"));
-    q.add(new Term("field", "2"));
-    hits = searcher.search(q, null, 1000).scoreDocs;
-    assertEquals(0, hits.length);
-
-    // same as previous, just specify positions explicitely.
-    q = new PhraseQuery(); 
-    q.add(new Term("field", "1"),0);
-    q.add(new Term("field", "2"),1);
-    hits = searcher.search(q, null, 1000).scoreDocs;
-    assertEquals(0, hits.length);
-
-    // specifying correct positions should find the phrase.
-    q = new PhraseQuery();
-    q.add(new Term("field", "1"),0);
-    q.add(new Term("field", "2"),2);
-    hits = searcher.search(q, null, 1000).scoreDocs;
-    assertEquals(1, hits.length);
-
-    q = new PhraseQuery();
-    q.add(new Term("field", "2"));
-    q.add(new Term("field", "3"));
-    hits = searcher.search(q, null, 1000).scoreDocs;
-    assertEquals(1, hits.length);
-
-    q = new PhraseQuery();
-    q.add(new Term("field", "3"));
-    q.add(new Term("field", "4"));
-    hits = searcher.search(q, null, 1000).scoreDocs;
-    assertEquals(0, hits.length);
-
-    // phrase query would find it when correct positions are specified. 
-    q = new PhraseQuery();
-    q.add(new Term("field", "3"),0);
-    q.add(new Term("field", "4"),0);
-    hits = searcher.search(q, null, 1000).scoreDocs;
-    assertEquals(1, hits.length);
-
-    // phrase query should fail for non existing searched term 
-    // even if there exist another searched terms in the same searched position. 
-    q = new PhraseQuery();
-    q.add(new Term("field", "3"),0);
-    q.add(new Term("field", "9"),0);
-    hits = searcher.search(q, null, 1000).scoreDocs;
-    assertEquals(0, hits.length);
-
-    // multi-phrase query should succed for non existing searched term
-    // because there exist another searched terms in the same searched position. 
-    MultiPhraseQuery mq = new MultiPhraseQuery();
-    mq.add(new Term[]{new Term("field", "3"),new Term("field", "9")},0);
-    hits = searcher.search(mq, null, 1000).scoreDocs;
-    assertEquals(1, hits.length);
-
-    q = new PhraseQuery();
-    q.add(new Term("field", "2"));
-    q.add(new Term("field", "4"));
-    hits = searcher.search(q, null, 1000).scoreDocs;
-    assertEquals(1, hits.length);
-
-    q = new PhraseQuery();
-    q.add(new Term("field", "3"));
-    q.add(new Term("field", "5"));
-    hits = searcher.search(q, null, 1000).scoreDocs;
-    assertEquals(1, hits.length);
-
-    q = new PhraseQuery();
-    q.add(new Term("field", "4"));
-    q.add(new Term("field", "5"));
-    hits = searcher.search(q, null, 1000).scoreDocs;
-    assertEquals(1, hits.length);
-
-    q = new PhraseQuery();
-    q.add(new Term("field", "2"));
-    q.add(new Term("field", "5"));
-    hits = searcher.search(q, null, 1000).scoreDocs;
-    assertEquals(0, hits.length);
-
-    // should not find "1 2" because there is a gap of 1 in the index
-    QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field",
-                                     new StopWhitespaceAnalyzer(false));
-    q = (PhraseQuery) qp.parse("\"1 2\"");
-    hits = searcher.search(q, null, 1000).scoreDocs;
-    assertEquals(0, hits.length);
-
-    // omitted stop word cannot help because stop filter swallows the increments. 
-    q = (PhraseQuery) qp.parse("\"1 stop 2\"");
-    hits = searcher.search(q, null, 1000).scoreDocs;
-    assertEquals(0, hits.length);
-
-    // query parser alone won't help, because stop filter swallows the increments. 
-    qp.setEnablePositionIncrements(true);
-    q = (PhraseQuery) qp.parse("\"1 stop 2\"");
-    hits = searcher.search(q, null, 1000).scoreDocs;
-    assertEquals(0, hits.length);
-
-    // stop filter alone won't help, because query parser swallows the increments. 
-    qp.setEnablePositionIncrements(false);
-    q = (PhraseQuery) qp.parse("\"1 stop 2\"");
-    hits = searcher.search(q, null, 1000).scoreDocs;
-    assertEquals(0, hits.length);
-      
-    // when both qp qnd stopFilter propagate increments, we should find the doc.
-    qp = new QueryParser(TEST_VERSION_CURRENT, "field",
-                         new StopWhitespaceAnalyzer(true));
-    qp.setEnablePositionIncrements(true);
-    q = (PhraseQuery) qp.parse("\"1 stop 2\"");
-    hits = searcher.search(q, null, 1000).scoreDocs;
-    assertEquals(1, hits.length);
-    
-    searcher.close();
-    reader.close();
-    store.close();
-  }
-
-  private static class StopWhitespaceAnalyzer extends Analyzer {
-    boolean enablePositionIncrements;
-    final WhitespaceAnalyzer a = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
-    public StopWhitespaceAnalyzer(boolean enablePositionIncrements) {
-      this.enablePositionIncrements = enablePositionIncrements;
-    }
-    @Override
-    public TokenStream tokenStream(String fieldName, Reader reader) {
-      TokenStream ts = a.tokenStream(fieldName,reader);
-      return new StopFilter(enablePositionIncrements?TEST_VERSION_CURRENT:Version.LUCENE_24, ts,
-          new CharArraySet(TEST_VERSION_CURRENT, Collections.singleton("stop"), true));
-    }
-  }
-  
-  public void testPayloadsPos0() throws Exception {
-    Directory dir = newDirectory();
-    RandomIndexWriter writer = new RandomIndexWriter(random, dir, new TestPayloadAnalyzer());
-    Document doc = new Document();
-    doc.add(new Field("content",
-                      new StringReader("a a b c d e a f g h i j a b k k")));
-    writer.addDocument(doc);
-
-    IndexReader r = writer.getReader();
-
-    TermPositions tp = r.termPositions(new Term("content", "a"));
-    int count = 0;
-    assertTrue(tp.next());
-    // "a" occurs 4 times
-    assertEquals(4, tp.freq());
-    int expected = 0;
-    assertEquals(expected, tp.nextPosition());
-    assertEquals(1, tp.nextPosition());
-    assertEquals(3, tp.nextPosition());
-    assertEquals(6, tp.nextPosition());
-
-    // only one doc has "a"
-    assertFalse(tp.next());
-
-    IndexSearcher is = newSearcher(r);
-  
-    SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
-    SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
-    SpanQuery[] sqs = { stq1, stq2 };
-    SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);
-
-    count = 0;
-    boolean sawZero = false;
-    //System.out.println("\ngetPayloadSpans test");
-    Spans pspans = snq.getSpans(is.getIndexReader());
-    while (pspans.next()) {
-      //System.out.println(pspans.doc() + " - " + pspans.start() + " - "+ pspans.end());
-      Collection<byte[]> payloads = pspans.getPayload();
-      sawZero |= pspans.start() == 0;
-      count += payloads.size();
-    }
-    assertEquals(5, count);
-    assertTrue(sawZero);
-
-    //System.out.println("\ngetSpans test");
-    Spans spans = snq.getSpans(is.getIndexReader());
-    count = 0;
-    sawZero = false;
-    while (spans.next()) {
-      count++;
-      sawZero |= spans.start() == 0;
-      //System.out.println(spans.doc() + " - " + spans.start() + " - " + spans.end());
-    }
-    assertEquals(4, count);
-    assertTrue(sawZero);
-  
-    //System.out.println("\nPayloadSpanUtil test");
-
-    sawZero = false;
-    PayloadSpanUtil psu = new PayloadSpanUtil(is.getIndexReader());
-    Collection<byte[]> pls = psu.getPayloadsForQuery(snq);
-    count = pls.size();
-    for (byte[] bytes : pls) {
-      String s = new String(bytes);
-      //System.out.println(s);
-      sawZero |= s.equals("pos: 0");
-    }
-    assertEquals(5, count);
-    assertTrue(sawZero);
-    writer.close();
-    is.getIndexReader().close();
-    dir.close();
-  }
-}
-
-final class TestPayloadAnalyzer extends Analyzer {
-
-  @Override
-  public TokenStream tokenStream(String fieldName, Reader reader) {
-    TokenStream result = new LowerCaseTokenizer(LuceneTestCase.TEST_VERSION_CURRENT, reader);
-    return new PayloadFilter(result, fieldName);
-  }
-}
-
-final class PayloadFilter extends TokenFilter {
-  String fieldName;
-
-  int pos;
-
-  int i;
-
-  final PositionIncrementAttribute posIncrAttr;
-  final PayloadAttribute payloadAttr;
-  final CharTermAttribute termAttr;
-
-  public PayloadFilter(TokenStream input, String fieldName) {
-    super(input);
-    this.fieldName = fieldName;
-    pos = 0;
-    i = 0;
-    posIncrAttr = input.addAttribute(PositionIncrementAttribute.class);
-    payloadAttr = input.addAttribute(PayloadAttribute.class);
-    termAttr = input.addAttribute(CharTermAttribute.class);
-  }
-
-  @Override
-  public boolean incrementToken() throws IOException {
-    if (input.incrementToken()) {
-      payloadAttr.setPayload(new Payload(("pos: " + pos).getBytes()));
-      int posIncr;
-      if (i % 2 == 1) {
-        posIncr = 1;
-      } else {
-        posIncr = 0;
-      }
-      posIncrAttr.setPositionIncrement(posIncr);
-      pos += posIncr;
-      if (TestPositionIncrement.VERBOSE) {
-        System.out.println("term=" + termAttr + " pos=" + pos);
-      }
-      i++;
-      return true;
-    } else {
-      return false;
-    }
-  }
-}