1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.util.ArrayList;
22 import java.util.Random;
24 import org.apache.lucene.analysis.Analyzer;
25 import org.apache.lucene.analysis.MockAnalyzer;
26 import org.apache.lucene.document.Document;
27 import org.apache.lucene.document.Field;
28 import org.apache.lucene.document.Field.Index;
29 import org.apache.lucene.document.Field.Store;
30 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
31 import org.apache.lucene.search.DefaultSimilarity;
32 import org.apache.lucene.search.Similarity;
33 import org.apache.lucene.store.Directory;
34 import org.apache.lucene.util.LuceneTestCase;
37 * Test that norms info is preserved during index life - including
38 * separate norms, addDocument, addIndexes, forceMerge.
40 public class TestNorms extends LuceneTestCase {
42 private class SimilarityOne extends DefaultSimilarity {
44 public float computeNorm(String fieldName, FieldInvertState state) {
45 // Disable length norm
46 return state.getBoost();
50 private static final int NUM_FIELDS = 10;
52 private Similarity similarityOne;
53 private Analyzer anlzr;
54 private int numDocNorms;
55 private ArrayList<Float> norms;
56 private ArrayList<Float> modifiedNorms;
57 private float lastNorm = 0;
58 private float normDelta = (float) 0.001;
61 public void setUp() throws Exception {
63 similarityOne = new SimilarityOne();
64 anlzr = new MockAnalyzer(random);
68 * Test that norms values are preserved as the index is maintained.
69 * Including separate norms.
70 * Including merging indexes with seprate norms.
71 * Including forceMerge.
73 public void testNorms() throws IOException {
74 Directory dir1 = newDirectory();
76 norms = new ArrayList<Float>();
77 modifiedNorms = new ArrayList<Float>();
79 createIndex(random, dir1);
80 doTestNorms(random, dir1);
82 // test with a single index: index2
83 ArrayList<Float> norms1 = norms;
84 ArrayList<Float> modifiedNorms1 = modifiedNorms;
85 int numDocNorms1 = numDocNorms;
87 norms = new ArrayList<Float>();
88 modifiedNorms = new ArrayList<Float>();
91 Directory dir2 = newDirectory();
93 createIndex(random, dir2);
94 doTestNorms(random, dir2);
96 // add index1 and index2 to a third index: index3
97 Directory dir3 = newDirectory();
99 createIndex(random, dir3);
100 IndexWriter iw = new IndexWriter(dir3, newIndexWriterConfig(
101 TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND)
102 .setMaxBufferedDocs(5).setMergePolicy(newLogMergePolicy(3)));
103 iw.addIndexes(new Directory[]{dir1,dir2});
107 norms1.addAll(norms);
109 modifiedNorms1.addAll(modifiedNorms);
110 modifiedNorms = modifiedNorms1;
111 numDocNorms += numDocNorms1;
115 doTestNorms(random, dir3);
117 // now with single segment
118 iw = new IndexWriter(dir3, newIndexWriterConfig( TEST_VERSION_CURRENT,
119 anlzr).setOpenMode(OpenMode.APPEND).setMaxBufferedDocs(5).setMergePolicy(newLogMergePolicy(3)));
129 private void doTestNorms(Random random, Directory dir) throws IOException {
130 int num = atLeast(1);
131 for (int i=0; i<num; i++) {
132 addDocs(random, dir,12,true);
134 modifyNormsForF1(dir);
136 addDocs(random, dir,12,false);
138 modifyNormsForF1(dir);
143 private void createIndex(Random random, Directory dir) throws IOException {
144 IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
145 TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.CREATE)
146 .setMaxBufferedDocs(5).setSimilarity(similarityOne).setMergePolicy(newLogMergePolicy()));
147 LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy();
148 lmp.setMergeFactor(3);
149 lmp.setUseCompoundFile(true);
153 private void modifyNormsForF1(Directory dir) throws IOException {
154 IndexReader ir = IndexReader.open(dir, false);
156 for (int i = 0; i < n; i+=3) { // modify for every third doc
157 int k = (i*3) % modifiedNorms.size();
158 float origNorm = modifiedNorms.get(i).floatValue();
159 float newNorm = modifiedNorms.get(k).floatValue();
160 //System.out.println("Modifying: for "+i+" from "+origNorm+" to "+newNorm);
161 //System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm);
162 modifiedNorms.set(i, Float.valueOf(newNorm));
163 modifiedNorms.set(k, Float.valueOf(origNorm));
164 ir.setNorm(i, "f"+1, newNorm);
165 ir.setNorm(k, "f"+1, origNorm);
171 private void verifyIndex(Directory dir) throws IOException {
172 IndexReader ir = IndexReader.open(dir, false);
173 for (int i = 0; i < NUM_FIELDS; i++) {
174 String field = "f"+i;
175 byte b[] = ir.norms(field);
176 assertEquals("number of norms mismatches",numDocNorms,b.length);
177 ArrayList<Float> storedNorms = (i==1 ? modifiedNorms : norms);
178 for (int j = 0; j < b.length; j++) {
179 float norm = similarityOne.decodeNormValue(b[j]);
180 float norm1 = storedNorms.get(j).floatValue();
181 assertEquals("stored norm value of "+field+" for doc "+j+" is "+norm+" - a mismatch!", norm, norm1, 0.000001);
187 private void addDocs(Random random, Directory dir, int ndocs, boolean compound) throws IOException {
188 IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
189 TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND)
190 .setMaxBufferedDocs(5).setSimilarity(similarityOne).setMergePolicy(newLogMergePolicy()));
191 LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy();
192 lmp.setMergeFactor(3);
193 lmp.setUseCompoundFile(compound);
194 for (int i = 0; i < ndocs; i++) {
195 iw.addDocument(newDoc());
200 // create the next document
201 private Document newDoc() {
202 Document d = new Document();
203 float boost = nextNorm();
204 for (int i = 0; i < 10; i++) {
205 Field f = newField("f"+i,"v"+i,Store.NO,Index.NOT_ANALYZED);
212 // return unique norm values that are unchanged by encoding/decoding
213 private float nextNorm() {
214 float norm = lastNorm + normDelta;
216 float norm1 = similarityOne.decodeNormValue(similarityOne.encodeNormValue(norm));
217 if (norm1 > lastNorm) {
218 //System.out.println(norm1+" > "+lastNorm);
224 norms.add(numDocNorms, Float.valueOf(norm));
225 modifiedNorms.add(numDocNorms, Float.valueOf(norm));
226 //System.out.println("creating norm("+numDocNorms+"): "+norm);
228 lastNorm = (norm>10 ? 0 : norm); //there's a limit to how many distinct values can be stored in a ingle byte
232 class CustomNormEncodingSimilarity extends DefaultSimilarity {
234 public byte encodeNormValue(float f) {
239 public float decodeNormValue(byte b) {
244 public float computeNorm(String field, FieldInvertState state) {
245 return (float) state.getLength();
250 public void testCustomEncoder() throws Exception {
251 Directory dir = newDirectory();
252 IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
253 config.setSimilarity(new CustomNormEncodingSimilarity());
254 RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
255 Document doc = new Document();
256 Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED);
257 Field bar = newField("bar", "", Field.Store.NO, Field.Index.ANALYZED);
261 for (int i = 0; i < 100; i++) {
262 bar.setValue("singleton");
263 writer.addDocument(doc);
266 IndexReader reader = writer.getReader();
269 byte fooNorms[] = reader.norms("foo");
270 for (int i = 0; i < reader.maxDoc(); i++)
271 assertEquals(0, fooNorms[i]);
273 byte barNorms[] = reader.norms("bar");
274 for (int i = 0; i < reader.maxDoc(); i++)
275 assertEquals(1, barNorms[i]);