lucene-java-3.5.0/lucene/src/test/org/apache/lucene/index/TestOmitTf.java

   1 package org.apache.lucene.index;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.IOException;
  21 import java.util.Collection;
  22
  23 import org.apache.lucene.util.LuceneTestCase;
  24 import org.apache.lucene.analysis.Analyzer;
  25 import org.apache.lucene.analysis.MockAnalyzer;
  26 import org.apache.lucene.document.Document;
  27 import org.apache.lucene.document.Field;
  28 import org.apache.lucene.index.FieldInfo.IndexOptions;
  29 import org.apache.lucene.search.*;
  30 import org.apache.lucene.search.BooleanClause.Occur;
  31 import org.apache.lucene.store.Directory;
  32 import org.apache.lucene.search.Explanation.IDFExplanation;
  33
  34
  35 public class TestOmitTf extends LuceneTestCase {
  36
  37   public static class SimpleSimilarity extends Similarity {
  38     @Override public float computeNorm(String field, FieldInvertState state) { return state.getBoost(); }
  39     @Override public float queryNorm(float sumOfSquaredWeights) { return 1.0f; }
  40     @Override public float tf(float freq) { return freq; }
  41     @Override public float sloppyFreq(int distance) { return 2.0f; }
  42     @Override public float idf(int docFreq, int numDocs) { return 1.0f; }
  43     @Override public float coord(int overlap, int maxOverlap) { return 1.0f; }
  44     @Override public IDFExplanation idfExplain(Collection<Term> terms, Searcher searcher) throws IOException {
  45       return new IDFExplanation() {
  46         @Override
  47         public float getIdf() {
  48           return 1.0f;
  49         }
  50         @Override
  51         public String explain() {
  52           return "Inexplicable";
  53         }
  54       };
  55     }
  56   }
  57
  58   // Tests whether the DocumentWriter correctly enable the
  59   // omitTermFreqAndPositions bit in the FieldInfo
  60   public void testOmitTermFreqAndPositions() throws Exception {
  61     Directory ram = newDirectory();
  62     Analyzer analyzer = new MockAnalyzer(random);
  63     IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer));
  64     Document d = new Document();
  65
  66     // this field will have Tf
  67     Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
  68     d.add(f1);
  69
  70     // this field will NOT have Tf
  71     Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
  72     f2.setIndexOptions(IndexOptions.DOCS_ONLY);
  73     d.add(f2);
  74
  75     writer.addDocument(d);
  76     writer.forceMerge(1);
  77     // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger
  78     // keep things constant
  79     d = new Document();
  80
  81     // Reverese
  82     f1.setIndexOptions(IndexOptions.DOCS_ONLY);
  83     d.add(f1);
  84
  85     f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
  86     d.add(f2);
  87
  88     writer.addDocument(d);
  89     // force merge
  90     writer.forceMerge(1);
  91     // flush
  92     writer.close();
  93
  94     SegmentReader reader = SegmentReader.getOnlySegmentReader(ram);
  95     FieldInfos fi = reader.fieldInfos();
  96     assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
  97     assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
  98
  99     reader.close();
 100     ram.close();
 101   }
 102
 103   // Tests whether merging of docs that have different
 104   // omitTermFreqAndPositions for the same field works
 105   public void testMixedMerge() throws Exception {
 106     Directory ram = newDirectory();
 107     Analyzer analyzer = new MockAnalyzer(random);
 108     IndexWriter writer = new IndexWriter(
 109         ram,
 110         newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).
 111             setMaxBufferedDocs(3).
 112             setMergePolicy(newLogMergePolicy(2))
 113     );
 114     writer.setInfoStream(VERBOSE ? System.out : null);
 115     Document d = new Document();
 116
 117     // this field will have Tf
 118     Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
 119     d.add(f1);
 120
 121     // this field will NOT have Tf
 122     Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
 123     f2.setIndexOptions(IndexOptions.DOCS_ONLY);
 124     d.add(f2);
 125
 126     for(int i=0;i<30;i++)
 127       writer.addDocument(d);
 128
 129     // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger
 130     // keep things constant
 131     d = new Document();
 132
 133     // Reverese
 134     f1.setIndexOptions(IndexOptions.DOCS_ONLY);
 135     d.add(f1);
 136
 137     f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
 138     d.add(f2);
 139
 140     for(int i=0;i<30;i++)
 141       writer.addDocument(d);
 142
 143     // force merge
 144     writer.forceMerge(1);
 145     // flush
 146     writer.close();
 147
 148     SegmentReader reader = SegmentReader.getOnlySegmentReader(ram);
 149     FieldInfos fi = reader.fieldInfos();
 150     assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
 151     assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
 152
 153     reader.close();
 154     ram.close();
 155   }
 156
 157   // Make sure first adding docs that do not omitTermFreqAndPositions for
 158   // field X, then adding docs that do omitTermFreqAndPositions for that same
 159   // field,
 160   public void testMixedRAM() throws Exception {
 161     Directory ram = newDirectory();
 162     Analyzer analyzer = new MockAnalyzer(random);
 163     IndexWriter writer = new IndexWriter(
 164         ram,
 165         newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).
 166             setMaxBufferedDocs(10).
 167             setMergePolicy(newLogMergePolicy(2))
 168     );
 169     Document d = new Document();
 170
 171     // this field will have Tf
 172     Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
 173     d.add(f1);
 174
 175     // this field will NOT have Tf
 176     Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
 177     d.add(f2);
 178
 179     for(int i=0;i<5;i++)
 180       writer.addDocument(d);
 181
 182     f2.setIndexOptions(IndexOptions.DOCS_ONLY);
 183
 184     for(int i=0;i<20;i++)
 185       writer.addDocument(d);
 186
 187     // force merge
 188     writer.forceMerge(1);
 189
 190     // flush
 191     writer.close();
 192
 193     SegmentReader reader = SegmentReader.getOnlySegmentReader(ram);
 194     FieldInfos fi = reader.fieldInfos();
 195     assertEquals("OmitTermFreqAndPositions field bit should not be set.", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f1").indexOptions);
 196     assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
 197
 198     reader.close();
 199     ram.close();
 200   }
 201
 202   private void assertNoPrx(Directory dir) throws Throwable {
 203     final String[] files = dir.listAll();
 204     for(int i=0;i<files.length;i++) {
 205       assertFalse(files[i].endsWith(".prx"));
 206       assertFalse(files[i].endsWith(".pos"));
 207     }
 208   }
 209
 210   // Verifies no *.prx exists when all fields omit term freq:
 211   public void testNoPrxFile() throws Throwable {
 212     Directory ram = newDirectory();
 213     Analyzer analyzer = new MockAnalyzer(random);
 214     IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(
 215                                                                    TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(3).setMergePolicy(newLogMergePolicy()));
 216     LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
 217     lmp.setMergeFactor(2);
 218     lmp.setUseCompoundFile(false);
 219     Document d = new Document();
 220
 221     Field f1 = newField("f1", "This field has no term freqs", Field.Store.NO, Field.Index.ANALYZED);
 222     f1.setIndexOptions(IndexOptions.DOCS_ONLY);
 223     d.add(f1);
 224
 225     for(int i=0;i<30;i++)
 226       writer.addDocument(d);
 227
 228     writer.commit();
 229
 230     assertNoPrx(ram);
 231
 232     // now add some documents with positions, and check
 233     // there is no prox after full merge
 234     d = new Document();
 235     f1 = newField("f1", "This field has positions", Field.Store.NO, Field.Index.ANALYZED);
 236     d.add(f1);
 237
 238     for(int i=0;i<30;i++)
 239       writer.addDocument(d);
 240
 241     // force merge
 242     writer.forceMerge(1);
 243     // flush
 244     writer.close();
 245
 246     assertNoPrx(ram);
 247     ram.close();
 248   }
 249
 250   // Test scores with one field with Term Freqs and one without, otherwise with equal content
 251   public void testBasic() throws Exception {
 252     Directory dir = newDirectory();
 253     Analyzer analyzer = new MockAnalyzer(random);
 254     IndexWriter writer = new IndexWriter(
 255         dir,
 256         newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).
 257             setMaxBufferedDocs(2).
 258             setSimilarity(new SimpleSimilarity()).
 259             setMergePolicy(newLogMergePolicy(2))
 260     );
 261     writer.setInfoStream(VERBOSE ? System.out : null);
 262
 263     StringBuilder sb = new StringBuilder(265);
 264     String term = "term";
 265     for(int i = 0; i<30; i++){
 266       Document d = new Document();
 267       sb.append(term).append(" ");
 268       String content  = sb.toString();
 269       Field noTf = newField("noTf", content + (i%2==0 ? "" : " notf"), Field.Store.NO, Field.Index.ANALYZED);
 270       noTf.setIndexOptions(IndexOptions.DOCS_ONLY);
 271       d.add(noTf);
 272
 273       Field tf = newField("tf", content + (i%2==0 ? " tf" : ""), Field.Store.NO, Field.Index.ANALYZED);
 274       d.add(tf);
 275
 276       writer.addDocument(d);
 277       //System.out.println(d);
 278     }
 279
 280     writer.forceMerge(1);
 281     // flush
 282     writer.close();
 283
 284     /*
 285      * Verify the index
 286      */
 287     IndexReader reader = IndexReader.open(dir);
 288     IndexSearcher searcher = new IndexSearcher(reader);
 289     searcher.setSimilarity(new SimpleSimilarity());
 290
 291     Term a = new Term("noTf", term);
 292     Term b = new Term("tf", term);
 293     Term c = new Term("noTf", "notf");
 294     Term d = new Term("tf", "tf");
 295     TermQuery q1 = new TermQuery(a);
 296     TermQuery q2 = new TermQuery(b);
 297     TermQuery q3 = new TermQuery(c);
 298     TermQuery q4 = new TermQuery(d);
 299
 300
 301     searcher.search(q1,
 302                     new CountingHitCollector() {
 303                       private Scorer scorer;
 304                       @Override
 305                       public final void setScorer(Scorer scorer) {
 306                         this.scorer = scorer;
 307                       }
 308                       @Override
 309                       public final void collect(int doc) throws IOException {
 310                         //System.out.println("Q1: Doc=" + doc + " score=" + score);
 311                         float score = scorer.score();
 312                         assertTrue(score==1.0f);
 313                         super.collect(doc);
 314                       }
 315                     });
 316     //System.out.println(CountingHitCollector.getCount());
 317
 318
 319     searcher.search(q2,
 320                     new CountingHitCollector() {
 321                       private Scorer scorer;
 322                       @Override
 323                       public final void setScorer(Scorer scorer) {
 324                         this.scorer = scorer;
 325                       }
 326                       @Override
 327                       public final void collect(int doc) throws IOException {
 328                         //System.out.println("Q2: Doc=" + doc + " score=" + score);
 329                         float score = scorer.score();
 330                         assertEquals(1.0f+doc, score, 0.00001f);
 331                         super.collect(doc);
 332                       }
 333                     });
 334     //System.out.println(CountingHitCollector.getCount());
 335
 336
 337
 338
 339
 340     searcher.search(q3,
 341                     new CountingHitCollector() {
 342                       private Scorer scorer;
 343                       @Override
 344                       public final void setScorer(Scorer scorer) {
 345                         this.scorer = scorer;
 346                       }
 347                       @Override
 348                       public final void collect(int doc) throws IOException {
 349                         //System.out.println("Q1: Doc=" + doc + " score=" + score);
 350                         float score = scorer.score();
 351                         assertTrue(score==1.0f);
 352                         assertFalse(doc%2==0);
 353                         super.collect(doc);
 354                       }
 355                     });
 356     //System.out.println(CountingHitCollector.getCount());
 357
 358
 359     searcher.search(q4,
 360                     new CountingHitCollector() {
 361                       private Scorer scorer;
 362                       @Override
 363                       public final void setScorer(Scorer scorer) {
 364                         this.scorer = scorer;
 365                       }
 366                       @Override
 367                       public final void collect(int doc) throws IOException {
 368                         float score = scorer.score();
 369                         //System.out.println("Q1: Doc=" + doc + " score=" + score);
 370                         assertTrue(score==1.0f);
 371                         assertTrue(doc%2==0);
 372                         super.collect(doc);
 373                       }
 374                     });
 375     //System.out.println(CountingHitCollector.getCount());
 376
 377
 378
 379     BooleanQuery bq = new BooleanQuery();
 380     bq.add(q1,Occur.MUST);
 381     bq.add(q4,Occur.MUST);
 382
 383     searcher.search(bq,
 384                     new CountingHitCollector() {
 385                       @Override
 386                       public final void collect(int doc) throws IOException {
 387                         //System.out.println("BQ: Doc=" + doc + " score=" + score);
 388                         super.collect(doc);
 389                       }
 390                     });
 391     assertTrue(15 == CountingHitCollector.getCount());
 392
 393     searcher.close();
 394     reader.close();
 395     dir.close();
 396   }
 397
 398   public static class CountingHitCollector extends Collector {
 399     static int count=0;
 400     static int sum=0;
 401     private int docBase = -1;
 402     CountingHitCollector(){count=0;sum=0;}
 403     @Override
 404     public void setScorer(Scorer scorer) throws IOException {}
 405     @Override
 406     public void collect(int doc) throws IOException {
 407       count++;
 408       sum += doc + docBase;  // use it to avoid any possibility of being merged away
 409     }
 410
 411     public static int getCount() { return count; }
 412     public static int getSum() { return sum; }
 413
 414     @Override
 415     public void setNextReader(IndexReader reader, int docBase) {
 416       this.docBase = docBase;
 417     }
 418     @Override
 419     public boolean acceptsDocsOutOfOrder() {
 420       return true;
 421     }
 422   }
 423 }