+++ /dev/null
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Random;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.Field.Index;
-import org.apache.lucene.document.Field.Store;
-import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-import org.apache.lucene.search.DefaultSimilarity;
-import org.apache.lucene.search.Similarity;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.LuceneTestCase;
-
-/**
- * Test that norms info is preserved during index life - including
- * separate norms, addDocument, addIndexes, optimize.
- */
-public class TestNorms extends LuceneTestCase {
-
- private class SimilarityOne extends DefaultSimilarity {
- @Override
- public float computeNorm(String fieldName, FieldInvertState state) {
- // Disable length norm
- return state.getBoost();
- }
- }
-
- private static final int NUM_FIELDS = 10;
-
- private Similarity similarityOne;
- private Analyzer anlzr;
- private int numDocNorms;
- private ArrayList<Float> norms;
- private ArrayList<Float> modifiedNorms;
- private float lastNorm = 0;
- private float normDelta = (float) 0.001;
-
- @Override
- public void setUp() throws Exception {
- super.setUp();
- similarityOne = new SimilarityOne();
- anlzr = new MockAnalyzer(random);
- }
-
- /**
- * Test that norms values are preserved as the index is maintained.
- * Including separate norms.
- * Including merging indexes with seprate norms.
- * Including optimize.
- */
- public void testNorms() throws IOException {
- Directory dir1 = newDirectory();
-
- norms = new ArrayList<Float>();
- modifiedNorms = new ArrayList<Float>();
-
- createIndex(random, dir1);
- doTestNorms(random, dir1);
-
- // test with a single index: index2
- ArrayList<Float> norms1 = norms;
- ArrayList<Float> modifiedNorms1 = modifiedNorms;
- int numDocNorms1 = numDocNorms;
-
- norms = new ArrayList<Float>();
- modifiedNorms = new ArrayList<Float>();
- numDocNorms = 0;
-
- Directory dir2 = newDirectory();
-
- createIndex(random, dir2);
- doTestNorms(random, dir2);
-
- // add index1 and index2 to a third index: index3
- Directory dir3 = newDirectory();
-
- createIndex(random, dir3);
- IndexWriter iw = new IndexWriter(dir3, newIndexWriterConfig(
- TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND)
- .setMaxBufferedDocs(5).setMergePolicy(newLogMergePolicy(3)));
- iw.addIndexes(new Directory[]{dir1,dir2});
- iw.optimize();
- iw.close();
-
- norms1.addAll(norms);
- norms = norms1;
- modifiedNorms1.addAll(modifiedNorms);
- modifiedNorms = modifiedNorms1;
- numDocNorms += numDocNorms1;
-
- // test with index3
- verifyIndex(dir3);
- doTestNorms(random, dir3);
-
- // now with optimize
- iw = new IndexWriter(dir3, newIndexWriterConfig( TEST_VERSION_CURRENT,
- anlzr).setOpenMode(OpenMode.APPEND).setMaxBufferedDocs(5).setMergePolicy(newLogMergePolicy(3)));
- iw.optimize();
- iw.close();
- verifyIndex(dir3);
-
- dir1.close();
- dir2.close();
- dir3.close();
- }
-
- private void doTestNorms(Random random, Directory dir) throws IOException {
- int num = atLeast(1);
- for (int i=0; i<num; i++) {
- addDocs(random, dir,12,true);
- verifyIndex(dir);
- modifyNormsForF1(dir);
- verifyIndex(dir);
- addDocs(random, dir,12,false);
- verifyIndex(dir);
- modifyNormsForF1(dir);
- verifyIndex(dir);
- }
- }
-
- private void createIndex(Random random, Directory dir) throws IOException {
- IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
- TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.CREATE)
- .setMaxBufferedDocs(5).setSimilarity(similarityOne).setMergePolicy(newLogMergePolicy()));
- LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy();
- lmp.setMergeFactor(3);
- lmp.setUseCompoundFile(true);
- iw.close();
- }
-
- private void modifyNormsForF1(Directory dir) throws IOException {
- IndexReader ir = IndexReader.open(dir, false);
- int n = ir.maxDoc();
- for (int i = 0; i < n; i+=3) { // modify for every third doc
- int k = (i*3) % modifiedNorms.size();
- float origNorm = modifiedNorms.get(i).floatValue();
- float newNorm = modifiedNorms.get(k).floatValue();
- //System.out.println("Modifying: for "+i+" from "+origNorm+" to "+newNorm);
- //System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm);
- modifiedNorms.set(i, Float.valueOf(newNorm));
- modifiedNorms.set(k, Float.valueOf(origNorm));
- ir.setNorm(i, "f"+1, newNorm);
- ir.setNorm(k, "f"+1, origNorm);
- }
- ir.close();
- }
-
-
- private void verifyIndex(Directory dir) throws IOException {
- IndexReader ir = IndexReader.open(dir, false);
- for (int i = 0; i < NUM_FIELDS; i++) {
- String field = "f"+i;
- byte b[] = ir.norms(field);
- assertEquals("number of norms mismatches",numDocNorms,b.length);
- ArrayList<Float> storedNorms = (i==1 ? modifiedNorms : norms);
- for (int j = 0; j < b.length; j++) {
- float norm = similarityOne.decodeNormValue(b[j]);
- float norm1 = storedNorms.get(j).floatValue();
- assertEquals("stored norm value of "+field+" for doc "+j+" is "+norm+" - a mismatch!", norm, norm1, 0.000001);
- }
- }
- ir.close();
- }
-
- private void addDocs(Random random, Directory dir, int ndocs, boolean compound) throws IOException {
- IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
- TEST_VERSION_CURRENT, anlzr).setOpenMode(OpenMode.APPEND)
- .setMaxBufferedDocs(5).setSimilarity(similarityOne).setMergePolicy(newLogMergePolicy()));
- LogMergePolicy lmp = (LogMergePolicy) iw.getConfig().getMergePolicy();
- lmp.setMergeFactor(3);
- lmp.setUseCompoundFile(compound);
- for (int i = 0; i < ndocs; i++) {
- iw.addDocument(newDoc());
- }
- iw.close();
- }
-
- // create the next document
- private Document newDoc() {
- Document d = new Document();
- float boost = nextNorm();
- for (int i = 0; i < 10; i++) {
- Field f = newField("f"+i,"v"+i,Store.NO,Index.NOT_ANALYZED);
- f.setBoost(boost);
- d.add(f);
- }
- return d;
- }
-
- // return unique norm values that are unchanged by encoding/decoding
- private float nextNorm() {
- float norm = lastNorm + normDelta;
- do {
- float norm1 = similarityOne.decodeNormValue(similarityOne.encodeNormValue(norm));
- if (norm1 > lastNorm) {
- //System.out.println(norm1+" > "+lastNorm);
- norm = norm1;
- break;
- }
- norm += normDelta;
- } while (true);
- norms.add(numDocNorms, Float.valueOf(norm));
- modifiedNorms.add(numDocNorms, Float.valueOf(norm));
- //System.out.println("creating norm("+numDocNorms+"): "+norm);
- numDocNorms ++;
- lastNorm = (norm>10 ? 0 : norm); //there's a limit to how many distinct values can be stored in a ingle byte
- return norm;
- }
-
- class CustomNormEncodingSimilarity extends DefaultSimilarity {
- @Override
- public byte encodeNormValue(float f) {
- return (byte) f;
- }
-
- @Override
- public float decodeNormValue(byte b) {
- return (float) b;
- }
-
- @Override
- public float computeNorm(String field, FieldInvertState state) {
- return (float) state.getLength();
- }
- }
-
- // LUCENE-1260
- public void testCustomEncoder() throws Exception {
- Directory dir = newDirectory();
- IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
- config.setSimilarity(new CustomNormEncodingSimilarity());
- RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
- Document doc = new Document();
- Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED);
- Field bar = newField("bar", "", Field.Store.NO, Field.Index.ANALYZED);
- doc.add(foo);
- doc.add(bar);
-
- for (int i = 0; i < 100; i++) {
- bar.setValue("singleton");
- writer.addDocument(doc);
- }
-
- IndexReader reader = writer.getReader();
- writer.close();
-
- byte fooNorms[] = reader.norms("foo");
- for (int i = 0; i < reader.maxDoc(); i++)
- assertEquals(0, fooNorms[i]);
-
- byte barNorms[] = reader.norms("bar");
- for (int i = 0; i < reader.maxDoc(); i++)
- assertEquals(1, barNorms[i]);
-
- reader.close();
- dir.close();
- }
-}