1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.Closeable;
21 import java.io.IOException;
23 import org.apache.lucene.store.Directory;
24 import org.apache.lucene.util.DoubleBarrelLRUCache;
25 import org.apache.lucene.util.CloseableThreadLocal;
27 /** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
28 * Directory. Pairs are accessed either by Term or by ordinal position the
31 final class TermInfosReader implements Closeable {
32 private final Directory directory;
33 private final String segment;
34 private final FieldInfos fieldInfos;
36 private final CloseableThreadLocal<ThreadResources> threadResources = new CloseableThreadLocal<ThreadResources>();
37 private final SegmentTermEnum origEnum;
38 private final long size;
40 private final Term[] indexTerms;
41 private final TermInfo[] indexInfos;
42 private final long[] indexPointers;
44 private final int totalIndexInterval;
46 private final static int DEFAULT_CACHE_SIZE = 1024;
48 // Just adds term's ord to TermInfo
49 private final static class TermInfoAndOrd extends TermInfo {
51 public TermInfoAndOrd(TermInfo ti, long termOrd) {
54 this.termOrd = termOrd;
58 private static class CloneableTerm extends DoubleBarrelLRUCache.CloneableKey {
59 private final Term term;
61 public CloneableTerm(Term t) {
62 this.term = new Term(t.field(), t.text());
66 public Object clone() {
67 return new CloneableTerm(term);
71 public boolean equals(Object _other) {
72 CloneableTerm other = (CloneableTerm) _other;
73 return term.equals(other.term);
77 public int hashCode() {
78 return term.hashCode();
82 private final DoubleBarrelLRUCache<CloneableTerm,TermInfoAndOrd> termsCache = new DoubleBarrelLRUCache<CloneableTerm,TermInfoAndOrd>(DEFAULT_CACHE_SIZE);
85 * Per-thread resources managed by ThreadLocal
87 private static final class ThreadResources {
88 SegmentTermEnum termEnum;
91 TermInfosReader(Directory dir, String seg, FieldInfos fis, int readBufferSize, int indexDivisor)
92 throws CorruptIndexException, IOException {
93 boolean success = false;
95 if (indexDivisor < 1 && indexDivisor != -1) {
96 throw new IllegalArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor);
104 origEnum = new SegmentTermEnum(directory.openInput(IndexFileNames.segmentFileName(segment, IndexFileNames.TERMS_EXTENSION),
105 readBufferSize), fieldInfos, false);
106 size = origEnum.size;
109 if (indexDivisor != -1) {
111 totalIndexInterval = origEnum.indexInterval * indexDivisor;
112 final SegmentTermEnum indexEnum = new SegmentTermEnum(directory.openInput(IndexFileNames.segmentFileName(segment, IndexFileNames.TERMS_INDEX_EXTENSION),
113 readBufferSize), fieldInfos, true);
116 int indexSize = 1+((int)indexEnum.size-1)/indexDivisor; // otherwise read index
118 indexTerms = new Term[indexSize];
119 indexInfos = new TermInfo[indexSize];
120 indexPointers = new long[indexSize];
122 for (int i = 0; indexEnum.next(); i++) {
123 indexTerms[i] = indexEnum.term();
124 indexInfos[i] = indexEnum.termInfo();
125 indexPointers[i] = indexEnum.indexPointer;
127 for (int j = 1; j < indexDivisor; j++)
128 if (!indexEnum.next())
135 // Do not load terms index:
136 totalIndexInterval = -1;
139 indexPointers = null;
143 // With lock-less commits, it's entirely possible (and
144 // fine) to hit a FileNotFound exception above. In
145 // this case, we want to explicitly close any subset
146 // of things that were opened so that we don't have to
147 // wait for a GC to do so.
154 public int getSkipInterval() {
155 return origEnum.skipInterval;
158 public int getMaxSkipLevels() {
159 return origEnum.maxSkipLevels;
162 public final void close() throws IOException {
163 if (origEnum != null)
165 threadResources.close();
168 /** Returns the number of term/value pairs in the set. */
173 private ThreadResources getThreadResources() {
174 ThreadResources resources = threadResources.get();
175 if (resources == null) {
176 resources = new ThreadResources();
177 resources.termEnum = terms();
178 threadResources.set(resources);
184 /** Returns the offset of the greatest index entry which is less than or equal to term.*/
185 private final int getIndexOffset(Term term) {
186 int lo = 0; // binary search indexTerms[]
187 int hi = indexTerms.length - 1;
190 int mid = (lo + hi) >>> 1;
191 int delta = term.compareTo(indexTerms[mid]);
202 private final void seekEnum(SegmentTermEnum enumerator, int indexOffset) throws IOException {
203 enumerator.seek(indexPointers[indexOffset],
204 ((long) indexOffset * totalIndexInterval) - 1,
205 indexTerms[indexOffset], indexInfos[indexOffset]);
208 /** Returns the TermInfo for a Term in the set, or null. */
209 TermInfo get(Term term) throws IOException {
210 return get(term, false);
213 /** Returns the TermInfo for a Term in the set, or null. */
214 private TermInfo get(Term term, boolean mustSeekEnum) throws IOException {
215 if (size == 0) return null;
219 final CloneableTerm cacheKey = new CloneableTerm(term);
221 TermInfoAndOrd tiOrd = termsCache.get(cacheKey);
222 ThreadResources resources = getThreadResources();
224 if (!mustSeekEnum && tiOrd != null) {
228 // optimize sequential access: first try scanning cached enum w/o seeking
229 SegmentTermEnum enumerator = resources.termEnum;
230 if (enumerator.term() != null // term is at or past current
231 && ((enumerator.prev() != null && term.compareTo(enumerator.prev())> 0)
232 || term.compareTo(enumerator.term()) >= 0)) {
233 int enumOffset = (int)(enumerator.position/totalIndexInterval)+1;
234 if (indexTerms.length == enumOffset // but before end of block
235 || term.compareTo(indexTerms[enumOffset]) < 0) {
240 int numScans = enumerator.scanTo(term);
241 if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0) {
242 ti = enumerator.termInfo();
244 // we only want to put this TermInfo into the cache if
245 // scanEnum skipped more than one dictionary entry.
246 // This prevents RangeQueries or WildcardQueries to
247 // wipe out the cache when they iterate over a large numbers
250 termsCache.put(cacheKey, new TermInfoAndOrd(ti, enumerator.position));
252 assert sameTermInfo(ti, tiOrd, enumerator);
253 assert (int) enumerator.position == tiOrd.termOrd;
264 // random-access: must seek
267 indexPos = (int) (tiOrd.termOrd / totalIndexInterval);
269 // Must do binary search:
270 indexPos = getIndexOffset(term);
273 seekEnum(enumerator, indexPos);
274 enumerator.scanTo(term);
276 if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0) {
277 ti = enumerator.termInfo();
279 // LUCENE-3183: it's possible, if term is Term("",
280 // ""), for the STE to be incorrectly un-positioned
281 // after scan-to; work around this by not caching in
283 if (enumerator.position >= 0) {
284 termsCache.put(cacheKey, new TermInfoAndOrd(ti, enumerator.position));
287 assert sameTermInfo(ti, tiOrd, enumerator);
288 assert enumerator.position == tiOrd.termOrd;
296 // called only from asserts
297 private final boolean sameTermInfo(TermInfo ti1, TermInfo ti2, SegmentTermEnum enumerator) {
298 if (ti1.docFreq != ti2.docFreq) {
301 if (ti1.freqPointer != ti2.freqPointer) {
304 if (ti1.proxPointer != ti2.proxPointer) {
307 // skipOffset is only valid when docFreq >= skipInterval:
308 if (ti1.docFreq >= enumerator.skipInterval &&
309 ti1.skipOffset != ti2.skipOffset) {
315 private void ensureIndexIsRead() {
316 if (indexTerms == null) {
317 throw new IllegalStateException("terms index was not loaded when this reader was created");
321 /** Returns the position of a Term in the set or -1. */
322 final long getPosition(Term term) throws IOException {
323 if (size == 0) return -1;
326 int indexOffset = getIndexOffset(term);
328 SegmentTermEnum enumerator = getThreadResources().termEnum;
329 seekEnum(enumerator, indexOffset);
331 while(term.compareTo(enumerator.term()) > 0 && enumerator.next()) {}
333 if (term.compareTo(enumerator.term()) == 0)
334 return enumerator.position;
339 /** Returns an enumeration of all the Terms and TermInfos in the set. */
340 public SegmentTermEnum terms() {
341 return (SegmentTermEnum)origEnum.clone();
344 /** Returns an enumeration of terms starting at or after the named term. */
345 public SegmentTermEnum terms(Term term) throws IOException {
347 return (SegmentTermEnum)getThreadResources().termEnum.clone();