1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.util.Collection;
23 import org.apache.lucene.util.LuceneTestCase;
24 import org.apache.lucene.analysis.Analyzer;
25 import org.apache.lucene.analysis.MockAnalyzer;
26 import org.apache.lucene.document.Document;
27 import org.apache.lucene.document.Field;
28 import org.apache.lucene.index.FieldInfo.IndexOptions;
29 import org.apache.lucene.search.*;
30 import org.apache.lucene.search.BooleanClause.Occur;
31 import org.apache.lucene.store.Directory;
32 import org.apache.lucene.search.Explanation.IDFExplanation;
35 public class TestOmitTf extends LuceneTestCase {
37 public static class SimpleSimilarity extends Similarity {
38 @Override public float computeNorm(String field, FieldInvertState state) { return state.getBoost(); }
39 @Override public float queryNorm(float sumOfSquaredWeights) { return 1.0f; }
40 @Override public float tf(float freq) { return freq; }
41 @Override public float sloppyFreq(int distance) { return 2.0f; }
42 @Override public float idf(int docFreq, int numDocs) { return 1.0f; }
43 @Override public float coord(int overlap, int maxOverlap) { return 1.0f; }
44 @Override public IDFExplanation idfExplain(Collection<Term> terms, Searcher searcher) throws IOException {
45 return new IDFExplanation() {
47 public float getIdf() {
51 public String explain() {
52 return "Inexplicable";
58 // Tests whether the DocumentWriter correctly enable the
59 // omitTermFreqAndPositions bit in the FieldInfo
60 public void testOmitTermFreqAndPositions() throws Exception {
61 Directory ram = newDirectory();
62 Analyzer analyzer = new MockAnalyzer(random);
63 IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer));
64 Document d = new Document();
66 // this field will have Tf
67 Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
70 // this field will NOT have Tf
71 Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
72 f2.setIndexOptions(IndexOptions.DOCS_ONLY);
75 writer.addDocument(d);
77 // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger
78 // keep things constant
82 f1.setIndexOptions(IndexOptions.DOCS_ONLY);
85 f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
88 writer.addDocument(d);
94 SegmentReader reader = SegmentReader.getOnlySegmentReader(ram);
95 FieldInfos fi = reader.fieldInfos();
96 assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
97 assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
103 // Tests whether merging of docs that have different
104 // omitTermFreqAndPositions for the same field works
105 public void testMixedMerge() throws Exception {
106 Directory ram = newDirectory();
107 Analyzer analyzer = new MockAnalyzer(random);
108 IndexWriter writer = new IndexWriter(
110 newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).
111 setMaxBufferedDocs(3).
112 setMergePolicy(newLogMergePolicy(2))
114 writer.setInfoStream(VERBOSE ? System.out : null);
115 Document d = new Document();
117 // this field will have Tf
118 Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
121 // this field will NOT have Tf
122 Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
123 f2.setIndexOptions(IndexOptions.DOCS_ONLY);
126 for(int i=0;i<30;i++)
127 writer.addDocument(d);
129 // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger
130 // keep things constant
134 f1.setIndexOptions(IndexOptions.DOCS_ONLY);
137 f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
140 for(int i=0;i<30;i++)
141 writer.addDocument(d);
144 writer.forceMerge(1);
148 SegmentReader reader = SegmentReader.getOnlySegmentReader(ram);
149 FieldInfos fi = reader.fieldInfos();
150 assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f1").indexOptions);
151 assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
157 // Make sure first adding docs that do not omitTermFreqAndPositions for
158 // field X, then adding docs that do omitTermFreqAndPositions for that same
160 public void testMixedRAM() throws Exception {
161 Directory ram = newDirectory();
162 Analyzer analyzer = new MockAnalyzer(random);
163 IndexWriter writer = new IndexWriter(
165 newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).
166 setMaxBufferedDocs(10).
167 setMergePolicy(newLogMergePolicy(2))
169 Document d = new Document();
171 // this field will have Tf
172 Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);
175 // this field will NOT have Tf
176 Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED);
180 writer.addDocument(d);
182 f2.setIndexOptions(IndexOptions.DOCS_ONLY);
184 for(int i=0;i<20;i++)
185 writer.addDocument(d);
188 writer.forceMerge(1);
193 SegmentReader reader = SegmentReader.getOnlySegmentReader(ram);
194 FieldInfos fi = reader.fieldInfos();
195 assertEquals("OmitTermFreqAndPositions field bit should not be set.", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.fieldInfo("f1").indexOptions);
196 assertEquals("OmitTermFreqAndPositions field bit should be set.", IndexOptions.DOCS_ONLY, fi.fieldInfo("f2").indexOptions);
202 private void assertNoPrx(Directory dir) throws Throwable {
203 final String[] files = dir.listAll();
204 for(int i=0;i<files.length;i++) {
205 assertFalse(files[i].endsWith(".prx"));
206 assertFalse(files[i].endsWith(".pos"));
210 // Verifies no *.prx exists when all fields omit term freq:
211 public void testNoPrxFile() throws Throwable {
212 Directory ram = newDirectory();
213 Analyzer analyzer = new MockAnalyzer(random);
214 IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(
215 TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(3).setMergePolicy(newLogMergePolicy()));
216 LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
217 lmp.setMergeFactor(2);
218 lmp.setUseCompoundFile(false);
219 Document d = new Document();
221 Field f1 = newField("f1", "This field has no term freqs", Field.Store.NO, Field.Index.ANALYZED);
222 f1.setIndexOptions(IndexOptions.DOCS_ONLY);
225 for(int i=0;i<30;i++)
226 writer.addDocument(d);
232 // now add some documents with positions, and check
233 // there is no prox after full merge
235 f1 = newField("f1", "This field has positions", Field.Store.NO, Field.Index.ANALYZED);
238 for(int i=0;i<30;i++)
239 writer.addDocument(d);
242 writer.forceMerge(1);
250 // Test scores with one field with Term Freqs and one without, otherwise with equal content
251 public void testBasic() throws Exception {
252 Directory dir = newDirectory();
253 Analyzer analyzer = new MockAnalyzer(random);
254 IndexWriter writer = new IndexWriter(
256 newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).
257 setMaxBufferedDocs(2).
258 setSimilarity(new SimpleSimilarity()).
259 setMergePolicy(newLogMergePolicy(2))
261 writer.setInfoStream(VERBOSE ? System.out : null);
263 StringBuilder sb = new StringBuilder(265);
264 String term = "term";
265 for(int i = 0; i<30; i++){
266 Document d = new Document();
267 sb.append(term).append(" ");
268 String content = sb.toString();
269 Field noTf = newField("noTf", content + (i%2==0 ? "" : " notf"), Field.Store.NO, Field.Index.ANALYZED);
270 noTf.setIndexOptions(IndexOptions.DOCS_ONLY);
273 Field tf = newField("tf", content + (i%2==0 ? " tf" : ""), Field.Store.NO, Field.Index.ANALYZED);
276 writer.addDocument(d);
277 //System.out.println(d);
280 writer.forceMerge(1);
287 IndexReader reader = IndexReader.open(dir);
288 IndexSearcher searcher = new IndexSearcher(reader);
289 searcher.setSimilarity(new SimpleSimilarity());
291 Term a = new Term("noTf", term);
292 Term b = new Term("tf", term);
293 Term c = new Term("noTf", "notf");
294 Term d = new Term("tf", "tf");
295 TermQuery q1 = new TermQuery(a);
296 TermQuery q2 = new TermQuery(b);
297 TermQuery q3 = new TermQuery(c);
298 TermQuery q4 = new TermQuery(d);
302 new CountingHitCollector() {
303 private Scorer scorer;
305 public final void setScorer(Scorer scorer) {
306 this.scorer = scorer;
309 public final void collect(int doc) throws IOException {
310 //System.out.println("Q1: Doc=" + doc + " score=" + score);
311 float score = scorer.score();
312 assertTrue(score==1.0f);
316 //System.out.println(CountingHitCollector.getCount());
320 new CountingHitCollector() {
321 private Scorer scorer;
323 public final void setScorer(Scorer scorer) {
324 this.scorer = scorer;
327 public final void collect(int doc) throws IOException {
328 //System.out.println("Q2: Doc=" + doc + " score=" + score);
329 float score = scorer.score();
330 assertEquals(1.0f+doc, score, 0.00001f);
334 //System.out.println(CountingHitCollector.getCount());
341 new CountingHitCollector() {
342 private Scorer scorer;
344 public final void setScorer(Scorer scorer) {
345 this.scorer = scorer;
348 public final void collect(int doc) throws IOException {
349 //System.out.println("Q1: Doc=" + doc + " score=" + score);
350 float score = scorer.score();
351 assertTrue(score==1.0f);
352 assertFalse(doc%2==0);
356 //System.out.println(CountingHitCollector.getCount());
360 new CountingHitCollector() {
361 private Scorer scorer;
363 public final void setScorer(Scorer scorer) {
364 this.scorer = scorer;
367 public final void collect(int doc) throws IOException {
368 float score = scorer.score();
369 //System.out.println("Q1: Doc=" + doc + " score=" + score);
370 assertTrue(score==1.0f);
371 assertTrue(doc%2==0);
375 //System.out.println(CountingHitCollector.getCount());
379 BooleanQuery bq = new BooleanQuery();
380 bq.add(q1,Occur.MUST);
381 bq.add(q4,Occur.MUST);
384 new CountingHitCollector() {
386 public final void collect(int doc) throws IOException {
387 //System.out.println("BQ: Doc=" + doc + " score=" + score);
391 assertTrue(15 == CountingHitCollector.getCount());
398 public static class CountingHitCollector extends Collector {
401 private int docBase = -1;
402 CountingHitCollector(){count=0;sum=0;}
404 public void setScorer(Scorer scorer) throws IOException {}
406 public void collect(int doc) throws IOException {
408 sum += doc + docBase; // use it to avoid any possibility of being merged away
411 public static int getCount() { return count; }
412 public static int getSum() { return sum; }
415 public void setNextReader(IndexReader reader, int docBase) {
416 this.docBase = docBase;
419 public boolean acceptsDocsOutOfOrder() {