--- /dev/null
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.search.*;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.search.Explanation.IDFExplanation;
+
+
+public class TestOmitTf extends LuceneTestCase {
+
+ public static class SimpleSimilarity extends Similarity {
+ @Override public float computeNorm(String field, FieldInvertState state) { return state.getBoost(); }
+ @Override public float queryNorm(float sumOfSquaredWeights) { return 1.0f; }
+ @Override public float tf(float freq) { return freq; }
+ @Override public float sloppyFreq(int distance) { return 2.0f; }
+ @Override public float idf(int docFreq, int numDocs) { return 1.0f; }
+ @Override public float coord(int overlap, int maxOverlap) { return 1.0f; }
+ @Override public IDFExplanation idfExplain(Collection<Term> terms, Searcher searcher) throws IOException {
+ return new IDFExplanation() {
+ @Override
+ public float getIdf() {
+ return 1.0f;
+ }
+ @Override
+ public String explain() {
+ return "Inexplicable";
+ }
+ };
+ }
+ }
+
+ // Tests whether the DocumentWriter correctly enable the
+ // omitTermFreqAndPositions bit in the FieldInfo
+ public void testOmitTermFreqAndPositions() throws Exception {
+ Directory ram = newDirectory();
+ Analyzer analyzer = new MockAnalyzer(random);
+ IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer));
+ Document d = new Document();
+
+ // this field will have Tf
+ Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
+ d.add(f1);
+
+ // this field will NOT have Tf
+ Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
+ f2.setIndexOptions(IndexOptions.DOCS_ONLY);
+ d.add(f2);
+
+ writer.addDocument(d);
+ writer.forceMerge(1);
+ // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger
+ // keep things constant
+ d = new Document();
+
+ // Reverese
+ f1.setIndexOptions(IndexOptions.DOCS_ONLY);
+ d.add(f1);
+
+ f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
+ d.add(f2);
+
+ writer.addDocument(d);
+ // force merge
+ writer.forceMerge(1);
+ // flush
+ writer.close();
+
+ SegmentReader reader = SegmentReader.getOnlySegmentReader(ram);
+ FieldInfos fi = reader.fieldInfos();
+ assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
+ assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
+
+ reader.close();
+ ram.close();
+ }
+
+ // Tests whether merging of docs that have different
+ // omitTermFreqAndPositions for the same field works
+ public void testMixedMerge() throws Exception {
+ Directory ram = newDirectory();
+ Analyzer analyzer = new MockAnalyzer(random);
+ IndexWriter writer = new IndexWriter(
+ ram,
+ newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).
+ setMaxBufferedDocs(3).
+ setMergePolicy(newLogMergePolicy(2))
+ );
+ writer.setInfoStream(VERBOSE ? System.out : null);
+ Document d = new Document();
+
+ // this field will have Tf
+ Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
+ d.add(f1);
+
+ // this field will NOT have Tf
+ Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
+ f2.setIndexOptions(IndexOptions.DOCS_ONLY);
+ d.add(f2);
+
+ for(int i=0;i<30;i++)
+ writer.addDocument(d);
+
+ // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger
+ // keep things constant
+ d = new Document();
+
+ // Reverese
+ f1.setIndexOptions(IndexOptions.DOCS_ONLY);
+ d.add(f1);
+
+ f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
+ d.add(f2);
+
+ for(int i=0;i<30;i++)
+ writer.addDocument(d);
+
+ // force merge
+ writer.forceMerge(1);
+ // flush
+ writer.close();
+
+ SegmentReader reader = SegmentReader.getOnlySegmentReader(ram);
+ FieldInfos fi = reader.fieldInfos();
+ assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
+ assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
+
+ reader.close();
+ ram.close();
+ }
+
+ // Make sure first adding docs that do not omitTermFreqAndPositions for
+ // field X, then adding docs that do omitTermFreqAndPositions for that same
+ // field,
+ public void testMixedRAM() throws Exception {
+ Directory ram = newDirectory();
+ Analyzer analyzer = new MockAnalyzer(random);
+ IndexWriter writer = new IndexWriter(
+ ram,
+ newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).
+ setMaxBufferedDocs(10).
+ setMergePolicy(newLogMergePolicy(2))
+ );
+ Document d = new Document();
+
+ // this field will have Tf
+ Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
+ d.add(f1);
+
+ // this field will NOT have Tf
+ Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
+ d.add(f2);
+
+ for(int i=0;i<5;i++)
+ writer.addDocument(d);
+
+ f2.setIndexOptions(IndexOptions.DOCS_ONLY);
+
+ for(int i=0;i<20;i++)
+ writer.addDocument(d);
+
+ // force merge
+ writer.forceMerge(1);
+
+ // flush
+ writer.close();
+
+ SegmentReader reader = SegmentReader.getOnlySegmentReader(ram);
+ FieldInfos fi = reader.fieldInfos();
+ assertEquals("OmitTermFreqAndPositions field bit should not be set.", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f1").indexOptions);
+ assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
+
+ reader.close();
+ ram.close();
+ }
+
+ private void assertNoPrx(Directory dir) throws Throwable {
+ final String[] files = dir.listAll();
+ for(int i=0;i<files.length;i++) {
+ assertFalse(files[i].endsWith(".prx"));
+ assertFalse(files[i].endsWith(".pos"));
+ }
+ }
+
+ // Verifies no *.prx exists when all fields omit term freq:
+ public void testNoPrxFile() throws Throwable {
+ Directory ram = newDirectory();
+ Analyzer analyzer = new MockAnalyzer(random);
+ IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(
+ TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(3).setMergePolicy(newLogMergePolicy()));
+ LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
+ lmp.setMergeFactor(2);
+ lmp.setUseCompoundFile(false);
+ Document d = new Document();
+
+ Field f1 = newField("f1", "This field has no term freqs", Field.Store.NO, Field.Index.ANALYZED);
+ f1.setIndexOptions(IndexOptions.DOCS_ONLY);
+ d.add(f1);
+
+ for(int i=0;i<30;i++)
+ writer.addDocument(d);
+
+ writer.commit();
+
+ assertNoPrx(ram);
+
+ // now add some documents with positions, and check
+ // there is no prox after full merge
+ d = new Document();
+ f1 = newField("f1", "This field has positions", Field.Store.NO, Field.Index.ANALYZED);
+ d.add(f1);
+
+ for(int i=0;i<30;i++)
+ writer.addDocument(d);
+
+ // force merge
+ writer.forceMerge(1);
+ // flush
+ writer.close();
+
+ assertNoPrx(ram);
+ ram.close();
+ }
+
+ // Test scores with one field with Term Freqs and one without, otherwise with equal content
+ public void testBasic() throws Exception {
+ Directory dir = newDirectory();
+ Analyzer analyzer = new MockAnalyzer(random);
+ IndexWriter writer = new IndexWriter(
+ dir,
+ newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).
+ setMaxBufferedDocs(2).
+ setSimilarity(new SimpleSimilarity()).
+ setMergePolicy(newLogMergePolicy(2))
+ );
+ writer.setInfoStream(VERBOSE ? System.out : null);
+
+ StringBuilder sb = new StringBuilder(265);
+ String term = "term";
+ for(int i = 0; i<30; i++){
+ Document d = new Document();
+ sb.append(term).append(" ");
+ String content = sb.toString();
+ Field noTf = newField("noTf", content + (i%2==0 ? "" : " notf"), Field.Store.NO, Field.Index.ANALYZED);
+ noTf.setIndexOptions(IndexOptions.DOCS_ONLY);
+ d.add(noTf);
+
+ Field tf = newField("tf", content + (i%2==0 ? " tf" : ""), Field.Store.NO, Field.Index.ANALYZED);
+ d.add(tf);
+
+ writer.addDocument(d);
+ //System.out.println(d);
+ }
+
+ writer.forceMerge(1);
+ // flush
+ writer.close();
+
+ /*
+ * Verify the index
+ */
+ IndexReader reader = IndexReader.open(dir);
+ IndexSearcher searcher = new IndexSearcher(reader);
+ searcher.setSimilarity(new SimpleSimilarity());
+
+ Term a = new Term("noTf", term);
+ Term b = new Term("tf", term);
+ Term c = new Term("noTf", "notf");
+ Term d = new Term("tf", "tf");
+ TermQuery q1 = new TermQuery(a);
+ TermQuery q2 = new TermQuery(b);
+ TermQuery q3 = new TermQuery(c);
+ TermQuery q4 = new TermQuery(d);
+
+
+ searcher.search(q1,
+ new CountingHitCollector() {
+ private Scorer scorer;
+ @Override
+ public final void setScorer(Scorer scorer) {
+ this.scorer = scorer;
+ }
+ @Override
+ public final void collect(int doc) throws IOException {
+ //System.out.println("Q1: Doc=" + doc + " score=" + score);
+ float score = scorer.score();
+ assertTrue(score==1.0f);
+ super.collect(doc);
+ }
+ });
+ //System.out.println(CountingHitCollector.getCount());
+
+
+ searcher.search(q2,
+ new CountingHitCollector() {
+ private Scorer scorer;
+ @Override
+ public final void setScorer(Scorer scorer) {
+ this.scorer = scorer;
+ }
+ @Override
+ public final void collect(int doc) throws IOException {
+ //System.out.println("Q2: Doc=" + doc + " score=" + score);
+ float score = scorer.score();
+ assertEquals(1.0f+doc, score, 0.00001f);
+ super.collect(doc);
+ }
+ });
+ //System.out.println(CountingHitCollector.getCount());
+
+
+
+
+
+ searcher.search(q3,
+ new CountingHitCollector() {
+ private Scorer scorer;
+ @Override
+ public final void setScorer(Scorer scorer) {
+ this.scorer = scorer;
+ }
+ @Override
+ public final void collect(int doc) throws IOException {
+ //System.out.println("Q1: Doc=" + doc + " score=" + score);
+ float score = scorer.score();
+ assertTrue(score==1.0f);
+ assertFalse(doc%2==0);
+ super.collect(doc);
+ }
+ });
+ //System.out.println(CountingHitCollector.getCount());
+
+
+ searcher.search(q4,
+ new CountingHitCollector() {
+ private Scorer scorer;
+ @Override
+ public final void setScorer(Scorer scorer) {
+ this.scorer = scorer;
+ }
+ @Override
+ public final void collect(int doc) throws IOException {
+ float score = scorer.score();
+ //System.out.println("Q1: Doc=" + doc + " score=" + score);
+ assertTrue(score==1.0f);
+ assertTrue(doc%2==0);
+ super.collect(doc);
+ }
+ });
+ //System.out.println(CountingHitCollector.getCount());
+
+
+
+ BooleanQuery bq = new BooleanQuery();
+ bq.add(q1,Occur.MUST);
+ bq.add(q4,Occur.MUST);
+
+ searcher.search(bq,
+ new CountingHitCollector() {
+ @Override
+ public final void collect(int doc) throws IOException {
+ //System.out.println("BQ: Doc=" + doc + " score=" + score);
+ super.collect(doc);
+ }
+ });
+ assertTrue(15 == CountingHitCollector.getCount());
+
+ searcher.close();
+ reader.close();
+ dir.close();
+ }
+
+ public static class CountingHitCollector extends Collector {
+ static int count=0;
+ static int sum=0;
+ private int docBase = -1;
+ CountingHitCollector(){count=0;sum=0;}
+ @Override
+ public void setScorer(Scorer scorer) throws IOException {}
+ @Override
+ public void collect(int doc) throws IOException {
+ count++;
+ sum += doc + docBase; // use it to avoid any possibility of being merged away
+ }
+
+ public static int getCount() { return count; }
+ public static int getSum() { return sum; }
+
+ @Override
+ public void setNextReader(IndexReader reader, int docBase) {
+ this.docBase = docBase;
+ }
+ @Override
+ public boolean acceptsDocsOutOfOrder() {
+ return true;
+ }
+ }
+}