--- /dev/null
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.HashSet;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.search.DefaultSimilarity;
+import org.apache.lucene.search.Similarity;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+
+/**
+ * Tests the uniqueTermCount statistic in FieldInvertState
+ */
+public class TestUniqueTermCount extends LuceneTestCase {
+ Directory dir;
+ IndexReader reader;
+ /* expected uniqueTermCount values for our documents */
+ ArrayList<Integer> expected = new ArrayList<Integer>();
+
+ @Override
+ public void setUp() throws Exception {
+ super.setUp();
+ dir = newDirectory();
+ IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
+ new MockAnalyzer(random, MockTokenizer.SIMPLE, true)).setMergePolicy(newLogMergePolicy());
+ config.setSimilarity(new TestSimilarity());
+ RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
+ Document doc = new Document();
+ Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED);
+ doc.add(foo);
+ for (int i = 0; i < 100; i++) {
+ foo.setValue(addValue());
+ writer.addDocument(doc);
+ }
+ reader = writer.getReader();
+ writer.close();
+ }
+
+ @Override
+ public void tearDown() throws Exception {
+ reader.close();
+ dir.close();
+ super.tearDown();
+ }
+
+ public void test() throws Exception {
+ byte fooNorms[] = reader.norms("foo");
+ for (int i = 0; i < reader.maxDoc(); i++)
+ assertEquals(expected.get(i).intValue(), fooNorms[i] & 0xff);
+ }
+
+ /**
+ * Makes a bunch of single-char tokens (the max # unique terms will at most be 26).
+ * puts the # unique terms into expected, to be checked against the norm.
+ */
+ private String addValue() {
+ StringBuilder sb = new StringBuilder();
+ HashSet<String> terms = new HashSet<String>();
+ int num = _TestUtil.nextInt(random, 0, 255);
+ for (int i = 0; i < num; i++) {
+ sb.append(' ');
+ char term = (char) _TestUtil.nextInt(random, 'a', 'z');
+ sb.append(term);
+ terms.add("" + term);
+ }
+ expected.add(terms.size());
+ return sb.toString();
+ }
+
+ /**
+ * Simple similarity that encodes maxTermFrequency directly as a byte
+ */
+ class TestSimilarity extends DefaultSimilarity {
+
+ @Override
+ public byte encodeNormValue(float f) {
+ return (byte) f;
+ }
+
+ @Override
+ public float computeNorm(String field, FieldInvertState state) {
+ return (float) state.getUniqueTermCount();
+ }
+ }
+}