lucene-java-3.5.0/lucene/src/test/org/apache/lucene/index/TestOmitPositions.java

   1 package org.apache.lucene.index;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import org.apache.lucene.analysis.Analyzer;
  21 import org.apache.lucene.analysis.MockAnalyzer;
  22 import org.apache.lucene.document.Document;
  23 import org.apache.lucene.document.Field;
  24 import org.apache.lucene.index.FieldInfo.IndexOptions;
  25 import org.apache.lucene.store.Directory;
  26 import org.apache.lucene.util.LuceneTestCase;
  27
  28 /**
  29  *
  30  * @lucene.experimental
  31  */
  32 public class TestOmitPositions extends LuceneTestCase {
  33
  34   public void testBasic() throws Exception {
  35     Directory dir = newDirectory();
  36     RandomIndexWriter w = new RandomIndexWriter(random, dir);
  37     Document doc = new Document();
  38     Field f = newField("foo", "this is a test test", Field.Index.ANALYZED);
  39     f.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
  40     doc.add(f);
  41     for (int i = 0; i < 100; i++) {
  42       w.addDocument(doc);
  43     }
  44
  45     IndexReader reader = w.getReader();
  46     w.close();
  47
  48     TermPositions tp = reader.termPositions(new Term("foo", "test"));
  49     while (tp.next()) {
  50       assertEquals(2, tp.freq());
  51       assertEquals(0, tp.nextPosition());
  52       assertEquals(0, tp.nextPosition());
  53     }
  54
  55     TermDocs te = reader.termDocs(new Term("foo", "test"));
  56     while (te.next()) {
  57       assertEquals(2, te.freq());
  58     }
  59
  60     reader.close();
  61     dir.close();
  62   }
  63
  64   // Tests whether the DocumentWriter correctly enable the
  65   // omitTermFreqAndPositions bit in the FieldInfo
  66   public void testPositions() throws Exception {
  67     Directory ram = newDirectory();
  68     Analyzer analyzer = new MockAnalyzer(random);
  69     IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer));
  70     Document d = new Document();
  71
  72     // f1,f2,f3: docs only
  73     Field f1 = newField("f1", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
  74     f1.setIndexOptions(IndexOptions.DOCS_ONLY);
  75     d.add(f1);
  76
  77     Field f2 = newField("f2", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
  78     f2.setIndexOptions(IndexOptions.DOCS_ONLY);
  79     d.add(f2);
  80
  81     Field f3 = newField("f3", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
  82     f3.setIndexOptions(IndexOptions.DOCS_ONLY);
  83     d.add(f3);
  84
  85     // f4,f5,f6 docs and freqs
  86     Field f4 = newField("f4", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
  87     f4.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
  88     d.add(f4);
  89
  90     Field f5 = newField("f5", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
  91     f5.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
  92     d.add(f5);
  93
  94     Field f6 = newField("f6", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
  95     f6.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
  96     d.add(f6);
  97
  98     // f7,f8,f9 docs/freqs/positions
  99     Field f7 = newField("f7", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
 100     f7.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
 101     d.add(f7);
 102
 103     Field f8 = newField("f8", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
 104     f8.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
 105     d.add(f8);
 106
 107     Field f9 = newField("f9", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
 108     f9.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
 109     d.add(f9);
 110
 111     writer.addDocument(d);
 112     writer.forceMerge(1);
 113
 114     // now we add another document which has docs-only for f1, f4, f7, docs/freqs for f2, f5, f8,
 115     // and docs/freqs/positions for f3, f6, f9
 116     d = new Document();
 117
 118     // f1,f4,f7: docs only
 119     f1 = newField("f1", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
 120     f1.setIndexOptions(IndexOptions.DOCS_ONLY);
 121     d.add(f1);
 122
 123     f4 = newField("f4", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
 124     f4.setIndexOptions(IndexOptions.DOCS_ONLY);
 125     d.add(f4);
 126
 127     f7 = newField("f7", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
 128     f7.setIndexOptions(IndexOptions.DOCS_ONLY);
 129     d.add(f7);
 130
 131     // f2, f5, f8: docs and freqs
 132     f2 = newField("f2", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
 133     f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
 134     d.add(f2);
 135
 136     f5 = newField("f5", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
 137     f5.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
 138     d.add(f5);
 139
 140     f8 = newField("f8", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
 141     f8.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
 142     d.add(f8);
 143
 144     // f3, f6, f9: docs and freqs and positions
 145     f3 = newField("f3", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
 146     f3.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
 147     d.add(f3);
 148
 149     f6 = newField("f6", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
 150     f6.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
 151     d.add(f6);
 152
 153     f9 = newField("f9", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
 154     f9.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
 155     d.add(f9);
 156
 157     writer.addDocument(d);
 158
 159     // force merge
 160     writer.forceMerge(1);
 161     // flush
 162     writer.close();
 163
 164     SegmentReader reader = SegmentReader.getOnlySegmentReader(ram);
 165     FieldInfos fi = reader.fieldInfos();
 166     // docs + docs = docs
 167     assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
 168     // docs + docs/freqs = docs
 169     assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
 170     // docs + docs/freqs/pos = docs
 171     assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f3").indexOptions);
 172     // docs/freqs + docs = docs
 173     assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f4").indexOptions);
 174     // docs/freqs + docs/freqs = docs/freqs
 175     assertEquals(IndexOptions.DOCS_AND_FREQS, fi.fieldInfo("f5").indexOptions);
 176     // docs/freqs + docs/freqs/pos = docs/freqs
 177     assertEquals(IndexOptions.DOCS_AND_FREQS, fi.fieldInfo("f6").indexOptions);
 178     // docs/freqs/pos + docs = docs
 179     assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f7").indexOptions);
 180     // docs/freqs/pos + docs/freqs = docs/freqs
 181     assertEquals(IndexOptions.DOCS_AND_FREQS, fi.fieldInfo("f8").indexOptions);
 182     // docs/freqs/pos + docs/freqs/pos = docs/freqs/pos
 183     assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f9").indexOptions);
 184
 185     reader.close();
 186     ram.close();
 187   }
 188
 189   private void assertNoPrx(Directory dir) throws Throwable {
 190     final String[] files = dir.listAll();
 191     for(int i=0;i<files.length;i++) {
 192       assertFalse(files[i].endsWith(".prx"));
 193       assertFalse(files[i].endsWith(".pos"));
 194     }
 195   }
 196
 197   // Verifies no *.prx exists when all fields omit term positions:
 198   public void testNoPrxFile() throws Throwable {
 199     Directory ram = newDirectory();
 200     Analyzer analyzer = new MockAnalyzer(random);
 201     IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(
 202                                                                    TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(3).setMergePolicy(newLogMergePolicy()));
 203     LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
 204     lmp.setMergeFactor(2);
 205     lmp.setUseCompoundFile(false);
 206     Document d = new Document();
 207
 208     Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
 209     f1.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
 210     d.add(f1);
 211
 212     for(int i=0;i<30;i++)
 213       writer.addDocument(d);
 214
 215     writer.commit();
 216
 217     assertNoPrx(ram);
 218
 219     // now add some documents with positions, and check there is no prox after optimization
 220     d = new Document();
 221     f1 = newField("f1", "This field has positions", Field.Store.NO, Field.Index.ANALYZED);
 222     d.add(f1);
 223
 224     for(int i=0;i<30;i++)
 225       writer.addDocument(d);
 226
 227     // force merge
 228     writer.forceMerge(1);
 229     // flush
 230     writer.close();
 231
 232     assertNoPrx(ram);
 233     ram.close();
 234   }
 235 }