package org.apache.lucene.index; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.Collection; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.search.*; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.store.Directory; import org.apache.lucene.search.Explanation.IDFExplanation; public class TestOmitTf extends LuceneTestCase { public static class SimpleSimilarity extends Similarity { @Override public float computeNorm(String field, FieldInvertState state) { return state.getBoost(); } @Override public float queryNorm(float sumOfSquaredWeights) { return 1.0f; } @Override public float tf(float freq) { return freq; } @Override public float sloppyFreq(int distance) { return 2.0f; } @Override public float idf(int docFreq, int numDocs) { return 1.0f; } @Override public float coord(int overlap, int maxOverlap) { return 1.0f; } @Override public IDFExplanation idfExplain(Collection terms, Searcher searcher) throws IOException { return new IDFExplanation() { @Override public float getIdf() { return 1.0f; } @Override public String explain() { return "Inexplicable"; } }; } } // Tests whether the DocumentWriter correctly enable the // omitTermFreqAndPositions bit in the FieldInfo public void testOmitTermFreqAndPositions() throws Exception { Directory ram = newDirectory(); Analyzer analyzer = new MockAnalyzer(random); IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer)); Document d = new Document(); // this field will have Tf Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); d.add(f1); // this field will NOT have Tf Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED); f2.setIndexOptions(IndexOptions.DOCS_ONLY); d.add(f2); writer.addDocument(d); writer.forceMerge(1); // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger // keep things constant d = new Document(); // Reverese f1.setIndexOptions(IndexOptions.DOCS_ONLY); d.add(f1); f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); d.add(f2); writer.addDocument(d); // force merge writer.forceMerge(1); // flush writer.close(); SegmentReader reader = SegmentReader.getOnlySegmentReader(ram); FieldInfos fi = reader.fieldInfos(); assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions); assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions); reader.close(); ram.close(); } // Tests whether merging of docs that have different // omitTermFreqAndPositions for the same field works public void testMixedMerge() throws Exception { Directory ram = newDirectory(); Analyzer analyzer = new MockAnalyzer(random); IndexWriter writer = new IndexWriter( ram, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer). setMaxBufferedDocs(3). setMergePolicy(newLogMergePolicy(2)) ); writer.setInfoStream(VERBOSE ? System.out : null); Document d = new Document(); // this field will have Tf Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); d.add(f1); // this field will NOT have Tf Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED); f2.setIndexOptions(IndexOptions.DOCS_ONLY); d.add(f2); for(int i=0;i<30;i++) writer.addDocument(d); // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger // keep things constant d = new Document(); // Reverese f1.setIndexOptions(IndexOptions.DOCS_ONLY); d.add(f1); f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); d.add(f2); for(int i=0;i<30;i++) writer.addDocument(d); // force merge writer.forceMerge(1); // flush writer.close(); SegmentReader reader = SegmentReader.getOnlySegmentReader(ram); FieldInfos fi = reader.fieldInfos(); assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions); assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions); reader.close(); ram.close(); } // Make sure first adding docs that do not omitTermFreqAndPositions for // field X, then adding docs that do omitTermFreqAndPositions for that same // field, public void testMixedRAM() throws Exception { Directory ram = newDirectory(); Analyzer analyzer = new MockAnalyzer(random); IndexWriter writer = new IndexWriter( ram, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer). setMaxBufferedDocs(10). setMergePolicy(newLogMergePolicy(2)) ); Document d = new Document(); // this field will have Tf Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); d.add(f1); // this field will NOT have Tf Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED); d.add(f2); for(int i=0;i<5;i++) writer.addDocument(d); f2.setIndexOptions(IndexOptions.DOCS_ONLY); for(int i=0;i<20;i++) writer.addDocument(d); // force merge writer.forceMerge(1); // flush writer.close(); SegmentReader reader = SegmentReader.getOnlySegmentReader(ram); FieldInfos fi = reader.fieldInfos(); assertEquals("OmitTermFreqAndPositions field bit should not be set.", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f1").indexOptions); assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions); reader.close(); ram.close(); } private void assertNoPrx(Directory dir) throws Throwable { final String[] files = dir.listAll(); for(int i=0;i