lucene-java-3.4.0/lucene/src/test/org/apache/lucene/index/TestOmitPositions.java

   1 package org.apache.lucene.index;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import org.apache.lucene.analysis.Analyzer;
  21 import org.apache.lucene.analysis.MockAnalyzer;
  22 import org.apache.lucene.document.Document;
  23 import org.apache.lucene.document.Field;
  24 import org.apache.lucene.index.FieldInfo.IndexOptions;
  25 import org.apache.lucene.search.DocIdSetIterator;
  26 import org.apache.lucene.store.Directory;
  27 import org.apache.lucene.util.BytesRef;
  28 import org.apache.lucene.util.LuceneTestCase;
  29
  30 /**
  31  *
  32  * @lucene.experimental
  33  */
  34 public class TestOmitPositions extends LuceneTestCase {
  35
  36   public void testBasic() throws Exception {
  37     Directory dir = newDirectory();
  38     RandomIndexWriter w = new RandomIndexWriter(random, dir);
  39     Document doc = new Document();
  40     Field f = newField("foo", "this is a test test", Field.Index.ANALYZED);
  41     f.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
  42     doc.add(f);
  43     for (int i = 0; i < 100; i++) {
  44       w.addDocument(doc);
  45     }
  46
  47     IndexReader reader = w.getReader();
  48     w.close();
  49
  50     TermPositions tp = reader.termPositions(new Term("foo", "test"));
  51     while (tp.next()) {
  52       assertEquals(2, tp.freq());
  53       assertEquals(0, tp.nextPosition());
  54       assertEquals(0, tp.nextPosition());
  55     }
  56
  57     TermDocs te = reader.termDocs(new Term("foo", "test"));
  58     while (te.next()) {
  59       assertEquals(2, te.freq());
  60     }
  61
  62     reader.close();
  63     dir.close();
  64   }
  65
  66   // Tests whether the DocumentWriter correctly enable the
  67   // omitTermFreqAndPositions bit in the FieldInfo
  68   public void testPositions() throws Exception {
  69     Directory ram = newDirectory();
  70     Analyzer analyzer = new MockAnalyzer(random);
  71     IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer));
  72     Document d = new Document();
  73
  74     // f1,f2,f3: docs only
  75     Field f1 = newField("f1", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
  76     f1.setIndexOptions(IndexOptions.DOCS_ONLY);
  77     d.add(f1);
  78
  79     Field f2 = newField("f2", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
  80     f2.setIndexOptions(IndexOptions.DOCS_ONLY);
  81     d.add(f2);
  82
  83     Field f3 = newField("f3", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
  84     f3.setIndexOptions(IndexOptions.DOCS_ONLY);
  85     d.add(f3);
  86
  87     // f4,f5,f6 docs and freqs
  88     Field f4 = newField("f4", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
  89     f4.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
  90     d.add(f4);
  91
  92     Field f5 = newField("f5", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
  93     f5.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
  94     d.add(f5);
  95
  96     Field f6 = newField("f6", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
  97     f6.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
  98     d.add(f6);
  99
 100     // f7,f8,f9 docs/freqs/positions
 101     Field f7 = newField("f7", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
 102     f7.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
 103     d.add(f7);
 104
 105     Field f8 = newField("f8", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
 106     f8.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
 107     d.add(f8);
 108
 109     Field f9 = newField("f9", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
 110     f9.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
 111     d.add(f9);
 112
 113     writer.addDocument(d);
 114     writer.optimize();
 115
 116     // now we add another document which has docs-only for f1, f4, f7, docs/freqs for f2, f5, f8,
 117     // and docs/freqs/positions for f3, f6, f9
 118     d = new Document();
 119
 120     // f1,f4,f7: docs only
 121     f1 = newField("f1", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
 122     f1.setIndexOptions(IndexOptions.DOCS_ONLY);
 123     d.add(f1);
 124
 125     f4 = newField("f4", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
 126     f4.setIndexOptions(IndexOptions.DOCS_ONLY);
 127     d.add(f4);
 128
 129     f7 = newField("f7", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED);
 130     f7.setIndexOptions(IndexOptions.DOCS_ONLY);
 131     d.add(f7);
 132
 133     // f2, f5, f8: docs and freqs
 134     f2 = newField("f2", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
 135     f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
 136     d.add(f2);
 137
 138     f5 = newField("f5", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
 139     f5.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
 140     d.add(f5);
 141
 142     f8 = newField("f8", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED);
 143     f8.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
 144     d.add(f8);
 145
 146     // f3, f6, f9: docs and freqs and positions
 147     f3 = newField("f3", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
 148     f3.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
 149     d.add(f3);
 150
 151     f6 = newField("f6", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
 152     f6.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
 153     d.add(f6);
 154
 155     f9 = newField("f9", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED);
 156     f9.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
 157     d.add(f9);
 158
 159     writer.addDocument(d);
 160
 161     // force merge
 162     writer.optimize();
 163     // flush
 164     writer.close();
 165
 166     SegmentReader reader = SegmentReader.getOnlySegmentReader(ram);
 167     FieldInfos fi = reader.fieldInfos();
 168     // docs + docs = docs
 169     assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
 170     // docs + docs/freqs = docs
 171     assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
 172     // docs + docs/freqs/pos = docs
 173     assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f3").indexOptions);
 174     // docs/freqs + docs = docs
 175     assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f4").indexOptions);
 176     // docs/freqs + docs/freqs = docs/freqs
 177     assertEquals(IndexOptions.DOCS_AND_FREQS, fi.fieldInfo("f5").indexOptions);
 178     // docs/freqs + docs/freqs/pos = docs/freqs
 179     assertEquals(IndexOptions.DOCS_AND_FREQS, fi.fieldInfo("f6").indexOptions);
 180     // docs/freqs/pos + docs = docs
 181     assertEquals(IndexOptions.DOCS_ONLY, fi.fieldInfo("f7").indexOptions);
 182     // docs/freqs/pos + docs/freqs = docs/freqs
 183     assertEquals(IndexOptions.DOCS_AND_FREQS, fi.fieldInfo("f8").indexOptions);
 184     // docs/freqs/pos + docs/freqs/pos = docs/freqs/pos
 185     assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f9").indexOptions);
 186
 187     reader.close();
 188     ram.close();
 189   }
 190
 191   private void assertNoPrx(Directory dir) throws Throwable {
 192     final String[] files = dir.listAll();
 193     for(int i=0;i<files.length;i++) {
 194       assertFalse(files[i].endsWith(".prx"));
 195       assertFalse(files[i].endsWith(".pos"));
 196     }
 197   }
 198
 199   // Verifies no *.prx exists when all fields omit term positions:
 200   public void testNoPrxFile() throws Throwable {
 201     Directory ram = newDirectory();
 202     Analyzer analyzer = new MockAnalyzer(random);
 203     IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(
 204                                                                    TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(3).setMergePolicy(newLogMergePolicy()));
 205     LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
 206     lmp.setMergeFactor(2);
 207     lmp.setUseCompoundFile(false);
 208     Document d = new Document();
 209
 210     Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
 211     f1.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
 212     d.add(f1);
 213
 214     for(int i=0;i<30;i++)
 215       writer.addDocument(d);
 216
 217     writer.commit();
 218
 219     assertNoPrx(ram);
 220
 221     // now add some documents with positions, and check there is no prox after optimization
 222     d = new Document();
 223     f1 = newField("f1", "This field has positions", Field.Store.NO, Field.Index.ANALYZED);
 224     d.add(f1);
 225
 226     for(int i=0;i<30;i++)
 227       writer.addDocument(d);
 228
 229     // force merge
 230     writer.optimize();
 231     // flush
 232     writer.close();
 233
 234     assertNoPrx(ram);
 235     ram.close();
 236   }
 237 }