1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.util.ArrayList;
22 import java.util.Random;
24 import java.util.concurrent.atomic.AtomicInteger;
25 import org.apache.lucene.analysis.Analyzer;
26 import org.apache.lucene.analysis.MockAnalyzer;
27 import org.apache.lucene.document.Document;
28 import org.apache.lucene.document.Field;
29 import org.apache.lucene.document.Field.Index;
30 import org.apache.lucene.document.Field.Store;
31 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
32 import org.apache.lucene.index.SegmentNorms;
33 import org.apache.lucene.search.DefaultSimilarity;
34 import org.apache.lucene.search.Similarity;
35 import org.apache.lucene.store.Directory;
36 import org.apache.lucene.util.LuceneTestCase;
39 * Tests cloning IndexReader norms
41 public class TestIndexReaderCloneNorms extends LuceneTestCase {
43 private class SimilarityOne extends DefaultSimilarity {
45 public float computeNorm(String fieldName, FieldInvertState state) {
47 return state.getBoost();
51 private static final int NUM_FIELDS = 10;
53 private Similarity similarityOne;
55 private Analyzer anlzr;
57 private int numDocNorms;
59 private ArrayList<Float> norms;
61 private ArrayList<Float> modifiedNorms;
63 private float lastNorm = 0;
65 private float normDelta = (float) 0.001;
68 public void setUp() throws Exception {
70 similarityOne = new SimilarityOne();
71 anlzr = new MockAnalyzer(random);
75 * Test that norms values are preserved as the index is maintained. Including
76 * separate norms. Including merging indexes with seprate norms. Including
79 public void testNorms() throws IOException {
80 // test with a single index: index1
81 Directory dir1 = newDirectory();
82 IndexWriter.unlock(dir1);
84 norms = new ArrayList<Float>();
85 modifiedNorms = new ArrayList<Float>();
87 createIndex(random, dir1);
88 doTestNorms(random, dir1);
90 // test with a single index: index2
91 ArrayList<Float> norms1 = norms;
92 ArrayList<Float> modifiedNorms1 = modifiedNorms;
93 int numDocNorms1 = numDocNorms;
95 norms = new ArrayList<Float>();
96 modifiedNorms = new ArrayList<Float>();
99 Directory dir2 = newDirectory();
101 createIndex(random, dir2);
102 doTestNorms(random, dir2);
104 // add index1 and index2 to a third index: index3
105 Directory dir3 = newDirectory();
107 createIndex(random, dir3);
108 IndexWriter iw = new IndexWriter(dir3, newIndexWriterConfig(
109 TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND)
110 .setMaxBufferedDocs(5).setMergePolicy(newLogMergePolicy(3)));
111 iw.addIndexes(new Directory[] { dir1, dir2 });
115 norms1.addAll(norms);
117 modifiedNorms1.addAll(modifiedNorms);
118 modifiedNorms = modifiedNorms1;
119 numDocNorms += numDocNorms1;
123 doTestNorms(random, dir3);
126 iw = new IndexWriter(dir3, newIndexWriterConfig( TEST_VERSION_CURRENT,
127 anlzr).setOpenMode(OpenMode.APPEND).setMaxBufferedDocs(5).setMergePolicy(newLogMergePolicy(3)));
137 // try cloning and reopening the norms
138 private void doTestNorms(Random random, Directory dir) throws IOException {
139 addDocs(random, dir, 12, true);
140 IndexReader ir = IndexReader.open(dir, false);
142 modifyNormsForF1(ir);
143 IndexReader irc = (IndexReader) ir.clone();// IndexReader.open(dir, false);//ir.clone();
146 modifyNormsForF1(irc);
148 IndexReader irc3 = (IndexReader) irc.clone();
150 modifyNormsForF1(irc3);
159 public void testNormsClose() throws IOException {
160 Directory dir1 = newDirectory();
161 TestIndexReaderReopen.createIndex(random, dir1, false);
162 SegmentReader reader1 = SegmentReader.getOnlySegmentReader(dir1);
163 reader1.norms("field1");
164 SegmentNorms r1norm = reader1.norms.get("field1");
165 AtomicInteger r1BytesRef = r1norm.bytesRef();
166 SegmentReader reader2 = (SegmentReader)reader1.clone();
167 assertEquals(2, r1norm.bytesRef().get());
169 assertEquals(1, r1BytesRef.get());
170 reader2.norms("field1");
175 public void testNormsRefCounting() throws IOException {
176 Directory dir1 = newDirectory();
177 TestIndexReaderReopen.createIndex(random, dir1, false);
178 IndexReader reader1 = IndexReader.open(dir1, false);
180 IndexReader reader2C = (IndexReader) reader1.clone();
181 SegmentReader segmentReader2C = SegmentReader.getOnlySegmentReader(reader2C);
182 segmentReader2C.norms("field1"); // load the norms for the field
183 SegmentNorms reader2CNorm = segmentReader2C.norms.get("field1");
184 assertTrue("reader2CNorm.bytesRef()=" + reader2CNorm.bytesRef(), reader2CNorm.bytesRef().get() == 2);
188 IndexReader reader3C = (IndexReader) reader2C.clone();
189 SegmentReader segmentReader3C = SegmentReader.getOnlySegmentReader(reader3C);
190 SegmentNorms reader3CCNorm = segmentReader3C.norms.get("field1");
191 assertEquals(3, reader3CCNorm.bytesRef().get());
193 // edit a norm and the refcount should be 1
194 IndexReader reader4C = (IndexReader) reader3C.clone();
195 SegmentReader segmentReader4C = SegmentReader.getOnlySegmentReader(reader4C);
196 assertEquals(4, reader3CCNorm.bytesRef().get());
197 reader4C.setNorm(5, "field1", 0.33f);
199 // generate a cannot update exception in reader1
201 reader3C.setNorm(1, "field1", 0.99f);
202 fail("did not hit expected exception");
203 } catch (Exception ex) {
207 // norm values should be different
208 assertTrue(Similarity.getDefault().decodeNormValue(segmentReader3C.norms("field1")[5])
209 != Similarity.getDefault().decodeNormValue(segmentReader4C.norms("field1")[5]));
210 SegmentNorms reader4CCNorm = segmentReader4C.norms.get("field1");
211 assertEquals(3, reader3CCNorm.bytesRef().get());
212 assertEquals(1, reader4CCNorm.bytesRef().get());
214 IndexReader reader5C = (IndexReader) reader4C.clone();
215 SegmentReader segmentReader5C = SegmentReader.getOnlySegmentReader(reader5C);
216 SegmentNorms reader5CCNorm = segmentReader5C.norms.get("field1");
217 reader5C.setNorm(5, "field1", 0.7f);
218 assertEquals(1, reader5CCNorm.bytesRef().get());
228 private void createIndex(Random random, Directory dir) throws IOException {
229 IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
230 TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.CREATE)
231 .setMaxBufferedDocs(5).setSimilarity(similarityOne).setMergePolicy(newLogMergePolicy()));
232 setUseCompoundFile(iw.getConfig().getMergePolicy(), true);
233 setMergeFactor(iw.getConfig().getMergePolicy(), 3);
237 private void modifyNormsForF1(IndexReader ir) throws IOException {
239 // System.out.println("modifyNormsForF1 maxDoc: "+n);
240 for (int i = 0; i < n; i += 3) { // modify for every third doc
241 int k = (i * 3) % modifiedNorms.size();
242 float origNorm = modifiedNorms.get(i).floatValue();
243 float newNorm = modifiedNorms.get(k).floatValue();
244 // System.out.println("Modifying: for "+i+" from "+origNorm+" to
246 // System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm);
247 modifiedNorms.set(i, Float.valueOf(newNorm));
248 modifiedNorms.set(k, Float.valueOf(origNorm));
249 ir.setNorm(i, "f" + 1, newNorm);
250 ir.setNorm(k, "f" + 1, origNorm);
251 // System.out.println("setNorm i: "+i);
257 private void verifyIndex(Directory dir) throws IOException {
258 IndexReader ir = IndexReader.open(dir, false);
263 private void verifyIndex(IndexReader ir) throws IOException {
264 for (int i = 0; i < NUM_FIELDS; i++) {
265 String field = "f" + i;
266 byte b[] = ir.norms(field);
267 assertEquals("number of norms mismatches", numDocNorms, b.length);
268 ArrayList<Float> storedNorms = (i == 1 ? modifiedNorms : norms);
269 for (int j = 0; j < b.length; j++) {
270 float norm = Similarity.getDefault().decodeNormValue(b[j]);
271 float norm1 = storedNorms.get(j).floatValue();
272 assertEquals("stored norm value of " + field + " for doc " + j + " is "
273 + norm + " - a mismatch!", norm, norm1, 0.000001);
278 private void addDocs(Random random, Directory dir, int ndocs, boolean compound)
280 IndexWriterConfig conf = newIndexWriterConfig(
281 TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND)
282 .setMaxBufferedDocs(5).setSimilarity(similarityOne).setMergePolicy(newLogMergePolicy());
283 LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
284 lmp.setMergeFactor(3);
285 lmp.setUseCompoundFile(compound);
286 IndexWriter iw = new IndexWriter(dir, conf);
287 for (int i = 0; i < ndocs; i++) {
288 iw.addDocument(newDoc());
293 // create the next document
294 private Document newDoc() {
295 Document d = new Document();
296 float boost = nextNorm();
297 for (int i = 0; i < 10; i++) {
298 Field f = newField("f" + i, "v" + i, Store.NO, Index.NOT_ANALYZED);
305 // return unique norm values that are unchanged by encoding/decoding
306 private float nextNorm() {
307 float norm = lastNorm + normDelta;
309 float norm1 = Similarity.getDefault().decodeNormValue(
310 Similarity.getDefault().encodeNormValue(norm));
311 if (norm1 > lastNorm) {
312 // System.out.println(norm1+" > "+lastNorm);
318 norms.add(numDocNorms, Float.valueOf(norm));
319 modifiedNorms.add(numDocNorms, Float.valueOf(norm));
320 // System.out.println("creating norm("+numDocNorms+"): "+norm);
322 lastNorm = (norm > 10 ? 0 : norm); // there's a limit to how many distinct
323 // values can be stored in a ingle byte