1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.util.ArrayList;
22 import java.util.Collections;
23 import java.util.List;
24 import java.util.Random;
26 import org.apache.lucene.analysis.MockAnalyzer;
27 import org.apache.lucene.analysis.MockTokenizer;
28 import org.apache.lucene.document.Document;
29 import org.apache.lucene.document.Field.Index;
30 import org.apache.lucene.document.Field.Store;
31 import org.apache.lucene.search.IndexSearcher;
32 import org.apache.lucene.search.TermQuery;
33 import org.apache.lucene.search.TopDocs;
34 import org.apache.lucene.store.Directory;
35 import org.apache.lucene.store.IndexInput;
36 import org.apache.lucene.store.LockObtainFailedException;
37 import org.apache.lucene.util.BytesRef;
38 import org.apache.lucene.util.LuceneTestCase;
39 import org.apache.lucene.util._TestUtil;
41 public class TestTermInfosReaderIndex extends LuceneTestCase {
43 private static final int NUMBER_OF_DOCUMENTS = 1000;
44 private static final int NUMBER_OF_FIELDS = 100;
45 private TermInfosReaderIndex index;
46 private Directory directory;
47 private SegmentTermEnum termEnum;
48 private int indexDivisor;
49 private int termIndexInterval;
50 private int readBufferSize = 1024;
51 private IndexReader reader;
52 private List<Term> sampleTerms;
55 public void setUp() throws Exception {
57 indexDivisor = _TestUtil.nextInt(random, 1, 10);
58 directory = newDirectory();
59 termIndexInterval = populate(directory);
61 SegmentReader r = SegmentReader.getOnlySegmentReader(directory);
62 String segment = r.getSegmentName();
65 FieldInfos fieldInfos = new FieldInfos(directory, IndexFileNames.segmentFileName(segment, IndexFileNames.FIELD_INFOS_EXTENSION));
66 String segmentFileName = IndexFileNames.segmentFileName(segment, IndexFileNames.TERMS_INDEX_EXTENSION);
67 long tiiFileLength = directory.fileLength(segmentFileName);
68 IndexInput input = directory.openInput(segmentFileName, readBufferSize);
69 termEnum = new SegmentTermEnum(directory.openInput(IndexFileNames.segmentFileName(segment, IndexFileNames.TERMS_EXTENSION), readBufferSize), fieldInfos, false);
70 int totalIndexInterval = termEnum.indexInterval * indexDivisor;
72 SegmentTermEnum indexEnum = new SegmentTermEnum(input, fieldInfos, true);
73 index = new TermInfosReaderIndex(indexEnum, indexDivisor, tiiFileLength, totalIndexInterval);
77 reader = IndexReader.open(directory);
78 sampleTerms = sample(reader,1000);
83 public void tearDown() throws Exception {
90 public void testSeekEnum() throws CorruptIndexException, IOException {
91 int indexPosition = 3;
92 SegmentTermEnum clone = (SegmentTermEnum) termEnum.clone();
93 Term term = findTermThatWouldBeAtIndex(clone, indexPosition);
94 SegmentTermEnum enumerator = clone;
95 index.seekEnum(enumerator, indexPosition);
96 assertEquals(term, enumerator.term());
100 public void testCompareTo() throws IOException {
101 Term term = new Term("field" + random.nextInt(NUMBER_OF_FIELDS) ,getText());
102 BytesRef termBytesRef = new BytesRef(term.text);
103 for (int i = 0; i < index.length(); i++) {
104 Term t = index.getTerm(i);
105 int compareTo = term.compareTo(t);
106 assertEquals(compareTo, index.compareTo(term, termBytesRef, i));
110 public void testRandomSearchPerformance() throws CorruptIndexException, IOException {
111 IndexSearcher searcher = new IndexSearcher(reader);
112 for (Term t : sampleTerms) {
113 TermQuery query = new TermQuery(t);
114 TopDocs topDocs = searcher.search(query, 10);
115 assertTrue(topDocs.totalHits > 0);
120 private List<Term> sample(IndexReader reader, int size) throws IOException {
121 List<Term> sample = new ArrayList<Term>();
122 Random random = new Random();
123 TermEnum terms = reader.terms();
124 while (terms.next()) {
125 if (sample.size() >= size) {
126 int pos = random.nextInt(size);
127 sample.set(pos, terms.term());
129 sample.add(terms.term());
133 Collections.shuffle(sample);
137 private Term findTermThatWouldBeAtIndex(SegmentTermEnum termEnum, int index) throws IOException {
138 int termPosition = index * termIndexInterval * indexDivisor;
139 for (int i = 0; i < termPosition; i++) {
140 if (!termEnum.next()) {
141 fail("Should not have run out of terms.");
144 return termEnum.term();
147 private int populate(Directory directory) throws CorruptIndexException, LockObtainFailedException, IOException {
148 IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
149 new MockAnalyzer(random, MockTokenizer.KEYWORD, false));
150 // turn off compound file, this test will open some index files directly.
151 LogMergePolicy mp = newLogMergePolicy();
152 mp.setUseCompoundFile(false);
153 config.setMergePolicy(mp);
155 RandomIndexWriter writer = new RandomIndexWriter(random, directory, config);
156 for (int i = 0; i < NUMBER_OF_DOCUMENTS; i++) {
157 Document document = new Document();
158 for (int f = 0; f < NUMBER_OF_FIELDS; f++) {
159 document.add(newField("field" + f,getText(),Store.NO,Index.NOT_ANALYZED_NO_NORMS));
161 writer.addDocument(document);
163 writer.forceMerge(1);
165 return config.getTermIndexInterval();
168 private String getText() {
169 return Long.toString(random.nextLong(),Character.MAX_RADIX);