pylucene 3.5.0-3

[pylucene.git] / lucene-java-3.5.0 / lucene / src / test / org / apache / lucene / index / TestOmitTf.java
diff --git a/lucene-java-3.5.0/lucene/src/test/org/apache/lucene/index/TestOmitTf.java b/lucene-java-3.5.0/lucene/src/test/org/apache/lucene/index/TestOmitTf.java

new file mode 100644 (file)

index 0000000..16a6183
--- /dev/null
+++ b/lucene-java-3.5.0/lucene/src/test/org/apache/lucene/index/TestOmitTf.java
@@ -0,0 +1,423 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.search.*;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.search.Explanation.IDFExplanation;
+
+
+public class TestOmitTf extends LuceneTestCase {
+  
+  public static class SimpleSimilarity extends Similarity {
+    @Override public float computeNorm(String field, FieldInvertState state) { return state.getBoost(); }
+    @Override public float queryNorm(float sumOfSquaredWeights) { return 1.0f; }
+    @Override public float tf(float freq) { return freq; }
+    @Override public float sloppyFreq(int distance) { return 2.0f; }
+    @Override public float idf(int docFreq, int numDocs) { return 1.0f; }
+    @Override public float coord(int overlap, int maxOverlap) { return 1.0f; }
+    @Override public IDFExplanation idfExplain(Collection<Term> terms, Searcher searcher) throws IOException {
+      return new IDFExplanation() {
+        @Override
+        public float getIdf() {
+          return 1.0f;
+        }
+        @Override
+        public String explain() {
+          return "Inexplicable";
+        }
+      };
+    }
+  }
+
+  // Tests whether the DocumentWriter correctly enable the
+  // omitTermFreqAndPositions bit in the FieldInfo
+  public void testOmitTermFreqAndPositions() throws Exception {
+    Directory ram = newDirectory();
+    Analyzer analyzer = new MockAnalyzer(random);
+    IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer));
+    Document d = new Document();
+        
+    // this field will have Tf
+    Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
+    d.add(f1);
+       
+    // this field will NOT have Tf
+    Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
+    f2.setIndexOptions(IndexOptions.DOCS_ONLY);
+    d.add(f2);
+        
+    writer.addDocument(d);
+    writer.forceMerge(1);
+    // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger
+    // keep things constant
+    d = new Document();
+        
+    // Reverese
+    f1.setIndexOptions(IndexOptions.DOCS_ONLY);
+    d.add(f1);
+        
+    f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);        
+    d.add(f2);
+        
+    writer.addDocument(d);
+    // force merge
+    writer.forceMerge(1);
+    // flush
+    writer.close();
+
+    SegmentReader reader = SegmentReader.getOnlySegmentReader(ram);
+    FieldInfos fi = reader.fieldInfos();
+    assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
+    assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
+        
+    reader.close();
+    ram.close();
+  }
+ 
+  // Tests whether merging of docs that have different
+  // omitTermFreqAndPositions for the same field works
+  public void testMixedMerge() throws Exception {
+    Directory ram = newDirectory();
+    Analyzer analyzer = new MockAnalyzer(random);
+    IndexWriter writer = new IndexWriter(
+        ram,
+        newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).
+            setMaxBufferedDocs(3).
+            setMergePolicy(newLogMergePolicy(2))
+    );
+    writer.setInfoStream(VERBOSE ? System.out : null);
+    Document d = new Document();
+        
+    // this field will have Tf
+    Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
+    d.add(f1);
+       
+    // this field will NOT have Tf
+    Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
+    f2.setIndexOptions(IndexOptions.DOCS_ONLY);
+    d.add(f2);
+
+    for(int i=0;i<30;i++)
+      writer.addDocument(d);
+        
+    // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger
+    // keep things constant
+    d = new Document();
+        
+    // Reverese
+    f1.setIndexOptions(IndexOptions.DOCS_ONLY);
+    d.add(f1);
+        
+    f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);        
+    d.add(f2);
+        
+    for(int i=0;i<30;i++)
+      writer.addDocument(d);
+        
+    // force merge
+    writer.forceMerge(1);
+    // flush
+    writer.close();
+
+    SegmentReader reader = SegmentReader.getOnlySegmentReader(ram);
+    FieldInfos fi = reader.fieldInfos();
+    assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
+    assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
+        
+    reader.close();
+    ram.close();
+  }
+
+  // Make sure first adding docs that do not omitTermFreqAndPositions for
+  // field X, then adding docs that do omitTermFreqAndPositions for that same
+  // field, 
+  public void testMixedRAM() throws Exception {
+    Directory ram = newDirectory();
+    Analyzer analyzer = new MockAnalyzer(random);
+    IndexWriter writer = new IndexWriter(
+        ram,
+        newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).
+            setMaxBufferedDocs(10).
+            setMergePolicy(newLogMergePolicy(2))
+    );
+    Document d = new Document();
+        
+    // this field will have Tf
+    Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
+    d.add(f1);
+       
+    // this field will NOT have Tf
+    Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
+    d.add(f2);
+
+    for(int i=0;i<5;i++)
+      writer.addDocument(d);
+
+    f2.setIndexOptions(IndexOptions.DOCS_ONLY);
+        
+    for(int i=0;i<20;i++)
+      writer.addDocument(d);
+
+    // force merge
+    writer.forceMerge(1);
+
+    // flush
+    writer.close();
+
+    SegmentReader reader = SegmentReader.getOnlySegmentReader(ram);
+    FieldInfos fi = reader.fieldInfos();
+    assertEquals("OmitTermFreqAndPositions field bit should not be set.", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f1").indexOptions);
+    assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
+        
+    reader.close();
+    ram.close();
+  }
+
+  private void assertNoPrx(Directory dir) throws Throwable {
+    final String[] files = dir.listAll();
+    for(int i=0;i<files.length;i++) {
+      assertFalse(files[i].endsWith(".prx"));
+      assertFalse(files[i].endsWith(".pos"));
+    }
+  }
+
+  // Verifies no *.prx exists when all fields omit term freq:
+  public void testNoPrxFile() throws Throwable {
+    Directory ram = newDirectory();
+    Analyzer analyzer = new MockAnalyzer(random);
+    IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(
+                                                                   TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(3).setMergePolicy(newLogMergePolicy()));
+    LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
+    lmp.setMergeFactor(2);
+    lmp.setUseCompoundFile(false);
+    Document d = new Document();
+        
+    Field f1 = newField("f1", "This field has no term freqs", Field.Store.NO, Field.Index.ANALYZED);
+    f1.setIndexOptions(IndexOptions.DOCS_ONLY);
+    d.add(f1);
+
+    for(int i=0;i<30;i++)
+      writer.addDocument(d);
+
+    writer.commit();
+
+    assertNoPrx(ram);
+    
+    // now add some documents with positions, and check
+    // there is no prox after full merge
+    d = new Document();
+    f1 = newField("f1", "This field has positions", Field.Store.NO, Field.Index.ANALYZED);
+    d.add(f1);
+    
+    for(int i=0;i<30;i++)
+      writer.addDocument(d);
+ 
+    // force merge
+    writer.forceMerge(1);
+    // flush
+    writer.close();
+
+    assertNoPrx(ram);
+    ram.close();
+  }
+ 
+  // Test scores with one field with Term Freqs and one without, otherwise with equal content 
+  public void testBasic() throws Exception {
+    Directory dir = newDirectory();  
+    Analyzer analyzer = new MockAnalyzer(random);
+    IndexWriter writer = new IndexWriter(
+        dir,
+        newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).
+            setMaxBufferedDocs(2).
+            setSimilarity(new SimpleSimilarity()).
+            setMergePolicy(newLogMergePolicy(2))
+    );
+    writer.setInfoStream(VERBOSE ? System.out : null);
+        
+    StringBuilder sb = new StringBuilder(265);
+    String term = "term";
+    for(int i = 0; i<30; i++){
+      Document d = new Document();
+      sb.append(term).append(" ");
+      String content  = sb.toString();
+      Field noTf = newField("noTf", content + (i%2==0 ? "" : " notf"), Field.Store.NO, Field.Index.ANALYZED);
+      noTf.setIndexOptions(IndexOptions.DOCS_ONLY);
+      d.add(noTf);
+          
+      Field tf = newField("tf", content + (i%2==0 ? " tf" : ""), Field.Store.NO, Field.Index.ANALYZED);
+      d.add(tf);
+          
+      writer.addDocument(d);
+      //System.out.println(d);
+    }
+        
+    writer.forceMerge(1);
+    // flush
+    writer.close();
+
+    /*
+     * Verify the index
+     */         
+    IndexReader reader = IndexReader.open(dir);
+    IndexSearcher searcher = new IndexSearcher(reader);
+    searcher.setSimilarity(new SimpleSimilarity());
+        
+    Term a = new Term("noTf", term);
+    Term b = new Term("tf", term);
+    Term c = new Term("noTf", "notf");
+    Term d = new Term("tf", "tf");
+    TermQuery q1 = new TermQuery(a);
+    TermQuery q2 = new TermQuery(b);
+    TermQuery q3 = new TermQuery(c);
+    TermQuery q4 = new TermQuery(d);
+
+        
+    searcher.search(q1,
+                    new CountingHitCollector() {
+                      private Scorer scorer;
+                      @Override
+                      public final void setScorer(Scorer scorer) {
+                        this.scorer = scorer;
+                      }
+                      @Override
+                      public final void collect(int doc) throws IOException {
+                        //System.out.println("Q1: Doc=" + doc + " score=" + score);
+                        float score = scorer.score();
+                        assertTrue(score==1.0f);
+                        super.collect(doc);
+                      }
+                    });
+    //System.out.println(CountingHitCollector.getCount());
+        
+        
+    searcher.search(q2,
+                    new CountingHitCollector() {
+                      private Scorer scorer;
+                      @Override
+                      public final void setScorer(Scorer scorer) {
+                        this.scorer = scorer;
+                      }
+                      @Override
+                      public final void collect(int doc) throws IOException {
+                        //System.out.println("Q2: Doc=" + doc + " score=" + score);
+                        float score = scorer.score();
+                        assertEquals(1.0f+doc, score, 0.00001f);
+                        super.collect(doc);
+                      }
+                    });
+    //System.out.println(CountingHitCollector.getCount());
+         
+        
+        
+        
+        
+    searcher.search(q3,
+                    new CountingHitCollector() {
+                      private Scorer scorer;
+                      @Override
+                      public final void setScorer(Scorer scorer) {
+                        this.scorer = scorer;
+                      }
+                      @Override
+                      public final void collect(int doc) throws IOException {
+                        //System.out.println("Q1: Doc=" + doc + " score=" + score);
+                        float score = scorer.score();
+                        assertTrue(score==1.0f);
+                        assertFalse(doc%2==0);
+                        super.collect(doc);
+                      }
+                    });
+    //System.out.println(CountingHitCollector.getCount());
+        
+        
+    searcher.search(q4,
+                    new CountingHitCollector() {
+                      private Scorer scorer;
+                      @Override
+                      public final void setScorer(Scorer scorer) {
+                        this.scorer = scorer;
+                      }
+                      @Override
+                      public final void collect(int doc) throws IOException {
+                        float score = scorer.score();
+                        //System.out.println("Q1: Doc=" + doc + " score=" + score);
+                        assertTrue(score==1.0f);
+                        assertTrue(doc%2==0);
+                        super.collect(doc);
+                      }
+                    });
+    //System.out.println(CountingHitCollector.getCount());
+        
+        
+        
+    BooleanQuery bq = new BooleanQuery();
+    bq.add(q1,Occur.MUST);
+    bq.add(q4,Occur.MUST);
+        
+    searcher.search(bq,
+                    new CountingHitCollector() {
+                      @Override
+                      public final void collect(int doc) throws IOException {
+                        //System.out.println("BQ: Doc=" + doc + " score=" + score);
+                        super.collect(doc);
+                      }
+                    });
+    assertTrue(15 == CountingHitCollector.getCount());
+        
+    searcher.close(); 
+    reader.close();
+    dir.close();
+  }
+     
+  public static class CountingHitCollector extends Collector {
+    static int count=0;
+    static int sum=0;
+    private int docBase = -1;
+    CountingHitCollector(){count=0;sum=0;}
+    @Override
+    public void setScorer(Scorer scorer) throws IOException {}
+    @Override
+    public void collect(int doc) throws IOException {
+      count++;
+      sum += doc + docBase;  // use it to avoid any possibility of being merged away
+    }
+
+    public static int getCount() { return count; }
+    public static int getSum() { return sum; }
+    
+    @Override
+    public void setNextReader(IndexReader reader, int docBase) {
+      this.docBase = docBase;
+    }
+    @Override
+    public boolean acceptsDocsOutOfOrder() {
+      return true;
+    }
+  }
+}