lucene-java-3.5.0/lucene/backwards/src/test/org/apache/lucene/index/TestMaxTermFrequency.java

   1 package org.apache.lucene.index;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.util.ArrayList;
  21 import java.util.Arrays;
  22 import java.util.Collections;
  23 import java.util.List;
  24
  25 import org.apache.lucene.analysis.MockAnalyzer;
  26 import org.apache.lucene.analysis.MockTokenizer;
  27 import org.apache.lucene.document.Document;
  28 import org.apache.lucene.document.Field;
  29 import org.apache.lucene.search.DefaultSimilarity;
  30 import org.apache.lucene.store.Directory;
  31 import org.apache.lucene.util.LuceneTestCase;
  32 import org.apache.lucene.util._TestUtil;
  33
  34 /**
  35  * Tests the maxTermFrequency statistic in FieldInvertState
  36  */
  37 public class TestMaxTermFrequency extends LuceneTestCase {
  38   Directory dir;
  39   IndexReader reader;
  40   /* expected maxTermFrequency values for our documents */
  41   ArrayList<Integer> expected = new ArrayList<Integer>();
  42
  43   @Override
  44   public void setUp() throws Exception {
  45     super.setUp();
  46     dir = newDirectory();
  47     IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
  48                new MockAnalyzer(random, MockTokenizer.SIMPLE, true)).setMergePolicy(newLogMergePolicy());
  49     config.setSimilarity(new TestSimilarity());
  50     RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
  51     Document doc = new Document();
  52     Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED);
  53     doc.add(foo);
  54     for (int i = 0; i < 100; i++) {
  55       foo.setValue(addValue());
  56       writer.addDocument(doc);
  57     }
  58     reader = writer.getReader();
  59     writer.close();
  60   }
  61
  62   @Override
  63   public void tearDown() throws Exception {
  64     reader.close();
  65     dir.close();
  66     super.tearDown();
  67   }
  68
  69   public void test() throws Exception {
  70     byte fooNorms[] = reader.norms("foo");
  71     for (int i = 0; i < reader.maxDoc(); i++)
  72       assertEquals(expected.get(i).intValue(), fooNorms[i] & 0xff);
  73   }
  74
  75   /**
  76    * Makes a bunch of single-char tokens (the max freq will at most be 255).
  77    * shuffles them around, and returns the whole list with Arrays.toString().
  78    * This works fine because we use lettertokenizer.
  79    * puts the max-frequency term into expected, to be checked against the norm.
  80    */
  81   private String addValue() {
  82     List<String> terms = new ArrayList<String>();
  83     int maxCeiling = _TestUtil.nextInt(random, 0, 255);
  84     int max = 0;
  85     for (char ch = 'a'; ch <= 'z'; ch++) {
  86       int num = _TestUtil.nextInt(random, 0, maxCeiling);
  87       for (int i = 0; i < num; i++)
  88         terms.add(Character.toString(ch));
  89       max = Math.max(max, num);
  90     }
  91     expected.add(max);
  92     Collections.shuffle(terms, random);
  93     return Arrays.toString(terms.toArray(new String[terms.size()]));
  94   }
  95
  96   /**
  97    * Simple similarity that encodes maxTermFrequency directly as a byte
  98    */
  99   class TestSimilarity extends DefaultSimilarity {
 100
 101     @Override
 102     public byte encodeNormValue(float f) {
 103       return (byte) f;
 104     }
 105
 106     @Override
 107     public float decodeNormValue(byte b) {
 108       return (float) b;
 109     }
 110
 111     @Override
 112     public float computeNorm(String field, FieldInvertState state) {
 113       return (float) state.getMaxTermFrequency();
 114     }
 115   }
 116 }