+++ /dev/null
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.Collection;
-
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util._TestUtil;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.index.FieldInfo.IndexOptions;
-import org.apache.lucene.search.*;
-import org.apache.lucene.search.BooleanClause.Occur;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.search.Explanation.IDFExplanation;
-
-
-public class TestOmitTf extends LuceneTestCase {
-
- public static class SimpleSimilarity extends Similarity {
- @Override public float computeNorm(String field, FieldInvertState state) { return state.getBoost(); }
- @Override public float queryNorm(float sumOfSquaredWeights) { return 1.0f; }
- @Override public float tf(float freq) { return freq; }
- @Override public float sloppyFreq(int distance) { return 2.0f; }
- @Override public float idf(int docFreq, int numDocs) { return 1.0f; }
- @Override public float coord(int overlap, int maxOverlap) { return 1.0f; }
- @Override public IDFExplanation idfExplain(Collection<Term> terms, Searcher searcher) throws IOException {
- return new IDFExplanation() {
- @Override
- public float getIdf() {
- return 1.0f;
- }
- @Override
- public String explain() {
- return "Inexplicable";
- }
- };
- }
- }
-
- // Tests whether the DocumentWriter correctly enable the
- // omitTermFreqAndPositions bit in the FieldInfo
- public void testOmitTermFreqAndPositions() throws Exception {
- Directory ram = newDirectory();
- Analyzer analyzer = new MockAnalyzer(random);
- IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer));
- Document d = new Document();
-
- // this field will have Tf
- Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
- d.add(f1);
-
- // this field will NOT have Tf
- Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
- f2.setIndexOptions(IndexOptions.DOCS_ONLY);
- d.add(f2);
-
- writer.addDocument(d);
- writer.optimize();
- // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger
- // keep things constant
- d = new Document();
-
- // Reverese
- f1.setIndexOptions(IndexOptions.DOCS_ONLY);
- d.add(f1);
-
- f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
- d.add(f2);
-
- writer.addDocument(d);
- // force merge
- writer.optimize();
- // flush
- writer.close();
-
- SegmentReader reader = SegmentReader.getOnlySegmentReader(ram);
- FieldInfos fi = reader.fieldInfos();
- assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
- assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
-
- reader.close();
- ram.close();
- }
-
- // Tests whether merging of docs that have different
- // omitTermFreqAndPositions for the same field works
- public void testMixedMerge() throws Exception {
- Directory ram = newDirectory();
- Analyzer analyzer = new MockAnalyzer(random);
- IndexWriter writer = new IndexWriter(
- ram,
- newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).
- setMaxBufferedDocs(3).
- setMergePolicy(newLogMergePolicy(2))
- );
- writer.setInfoStream(VERBOSE ? System.out : null);
- Document d = new Document();
-
- // this field will have Tf
- Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
- d.add(f1);
-
- // this field will NOT have Tf
- Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
- f2.setIndexOptions(IndexOptions.DOCS_ONLY);
- d.add(f2);
-
- for(int i=0;i<30;i++)
- writer.addDocument(d);
-
- // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger
- // keep things constant
- d = new Document();
-
- // Reverese
- f1.setIndexOptions(IndexOptions.DOCS_ONLY);
- d.add(f1);
-
- f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
- d.add(f2);
-
- for(int i=0;i<30;i++)
- writer.addDocument(d);
-
- // force merge
- writer.optimize();
- // flush
- writer.close();
-
- SegmentReader reader = SegmentReader.getOnlySegmentReader(ram);
- FieldInfos fi = reader.fieldInfos();
- assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
- assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
-
- reader.close();
- ram.close();
- }
-
- // Make sure first adding docs that do not omitTermFreqAndPositions for
- // field X, then adding docs that do omitTermFreqAndPositions for that same
- // field,
- public void testMixedRAM() throws Exception {
- Directory ram = newDirectory();
- Analyzer analyzer = new MockAnalyzer(random);
- IndexWriter writer = new IndexWriter(
- ram,
- newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).
- setMaxBufferedDocs(10).
- setMergePolicy(newLogMergePolicy(2))
- );
- Document d = new Document();
-
- // this field will have Tf
- Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
- d.add(f1);
-
- // this field will NOT have Tf
- Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
- d.add(f2);
-
- for(int i=0;i<5;i++)
- writer.addDocument(d);
-
- f2.setIndexOptions(IndexOptions.DOCS_ONLY);
-
- for(int i=0;i<20;i++)
- writer.addDocument(d);
-
- // force merge
- writer.optimize();
-
- // flush
- writer.close();
-
- SegmentReader reader = SegmentReader.getOnlySegmentReader(ram);
- FieldInfos fi = reader.fieldInfos();
- assertEquals("OmitTermFreqAndPositions field bit should not be set.", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f1").indexOptions);
- assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
-
- reader.close();
- ram.close();
- }
-
- private void assertNoPrx(Directory dir) throws Throwable {
- final String[] files = dir.listAll();
- for(int i=0;i<files.length;i++) {
- assertFalse(files[i].endsWith(".prx"));
- assertFalse(files[i].endsWith(".pos"));
- }
- }
-
- // Verifies no *.prx exists when all fields omit term freq:
- public void testNoPrxFile() throws Throwable {
- Directory ram = newDirectory();
- Analyzer analyzer = new MockAnalyzer(random);
- IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(
- TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(3).setMergePolicy(newLogMergePolicy()));
- LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
- lmp.setMergeFactor(2);
- lmp.setUseCompoundFile(false);
- Document d = new Document();
-
- Field f1 = newField("f1", "This field has no term freqs", Field.Store.NO, Field.Index.ANALYZED);
- f1.setIndexOptions(IndexOptions.DOCS_ONLY);
- d.add(f1);
-
- for(int i=0;i<30;i++)
- writer.addDocument(d);
-
- writer.commit();
-
- assertNoPrx(ram);
-
- // now add some documents with positions, and check there is no prox after optimization
- d = new Document();
- f1 = newField("f1", "This field has positions", Field.Store.NO, Field.Index.ANALYZED);
- d.add(f1);
-
- for(int i=0;i<30;i++)
- writer.addDocument(d);
-
- // force merge
- writer.optimize();
- // flush
- writer.close();
-
- assertNoPrx(ram);
- ram.close();
- }
-
- // Test scores with one field with Term Freqs and one without, otherwise with equal content
- public void testBasic() throws Exception {
- Directory dir = newDirectory();
- Analyzer analyzer = new MockAnalyzer(random);
- IndexWriter writer = new IndexWriter(
- dir,
- newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).
- setMaxBufferedDocs(2).
- setSimilarity(new SimpleSimilarity()).
- setMergePolicy(newLogMergePolicy(2))
- );
- writer.setInfoStream(VERBOSE ? System.out : null);
-
- StringBuilder sb = new StringBuilder(265);
- String term = "term";
- for(int i = 0; i<30; i++){
- Document d = new Document();
- sb.append(term).append(" ");
- String content = sb.toString();
- Field noTf = newField("noTf", content + (i%2==0 ? "" : " notf"), Field.Store.NO, Field.Index.ANALYZED);
- noTf.setIndexOptions(IndexOptions.DOCS_ONLY);
- d.add(noTf);
-
- Field tf = newField("tf", content + (i%2==0 ? " tf" : ""), Field.Store.NO, Field.Index.ANALYZED);
- d.add(tf);
-
- writer.addDocument(d);
- //System.out.println(d);
- }
-
- writer.optimize();
- // flush
- writer.close();
-
- /*
- * Verify the index
- */
- Searcher searcher = new IndexSearcher(dir, true);
- searcher.setSimilarity(new SimpleSimilarity());
-
- Term a = new Term("noTf", term);
- Term b = new Term("tf", term);
- Term c = new Term("noTf", "notf");
- Term d = new Term("tf", "tf");
- TermQuery q1 = new TermQuery(a);
- TermQuery q2 = new TermQuery(b);
- TermQuery q3 = new TermQuery(c);
- TermQuery q4 = new TermQuery(d);
-
-
- searcher.search(q1,
- new CountingHitCollector() {
- private Scorer scorer;
- @Override
- public final void setScorer(Scorer scorer) {
- this.scorer = scorer;
- }
- @Override
- public final void collect(int doc) throws IOException {
- //System.out.println("Q1: Doc=" + doc + " score=" + score);
- float score = scorer.score();
- assertTrue(score==1.0f);
- super.collect(doc);
- }
- });
- //System.out.println(CountingHitCollector.getCount());
-
-
- searcher.search(q2,
- new CountingHitCollector() {
- private Scorer scorer;
- @Override
- public final void setScorer(Scorer scorer) {
- this.scorer = scorer;
- }
- @Override
- public final void collect(int doc) throws IOException {
- //System.out.println("Q2: Doc=" + doc + " score=" + score);
- float score = scorer.score();
- assertEquals(1.0f+doc, score, 0.00001f);
- super.collect(doc);
- }
- });
- //System.out.println(CountingHitCollector.getCount());
-
-
-
-
-
- searcher.search(q3,
- new CountingHitCollector() {
- private Scorer scorer;
- @Override
- public final void setScorer(Scorer scorer) {
- this.scorer = scorer;
- }
- @Override
- public final void collect(int doc) throws IOException {
- //System.out.println("Q1: Doc=" + doc + " score=" + score);
- float score = scorer.score();
- assertTrue(score==1.0f);
- assertFalse(doc%2==0);
- super.collect(doc);
- }
- });
- //System.out.println(CountingHitCollector.getCount());
-
-
- searcher.search(q4,
- new CountingHitCollector() {
- private Scorer scorer;
- @Override
- public final void setScorer(Scorer scorer) {
- this.scorer = scorer;
- }
- @Override
- public final void collect(int doc) throws IOException {
- float score = scorer.score();
- //System.out.println("Q1: Doc=" + doc + " score=" + score);
- assertTrue(score==1.0f);
- assertTrue(doc%2==0);
- super.collect(doc);
- }
- });
- //System.out.println(CountingHitCollector.getCount());
-
-
-
- BooleanQuery bq = new BooleanQuery();
- bq.add(q1,Occur.MUST);
- bq.add(q4,Occur.MUST);
-
- searcher.search(bq,
- new CountingHitCollector() {
- @Override
- public final void collect(int doc) throws IOException {
- //System.out.println("BQ: Doc=" + doc + " score=" + score);
- super.collect(doc);
- }
- });
- assertTrue(15 == CountingHitCollector.getCount());
-
- searcher.close();
- dir.close();
- }
-
- public static class CountingHitCollector extends Collector {
- static int count=0;
- static int sum=0;
- private int docBase = -1;
- CountingHitCollector(){count=0;sum=0;}
- @Override
- public void setScorer(Scorer scorer) throws IOException {}
- @Override
- public void collect(int doc) throws IOException {
- count++;
- sum += doc + docBase; // use it to avoid any possibility of being optimized away
- }
-
- public static int getCount() { return count; }
- public static int getSum() { return sum; }
-
- @Override
- public void setNextReader(IndexReader reader, int docBase) {
- this.docBase = docBase;
- }
- @Override
- public boolean acceptsDocsOutOfOrder() {
- return true;
- }
- }
-}