+++ /dev/null
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.Closeable;
-import java.io.IOException;
-
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.DoubleBarrelLRUCache;
-import org.apache.lucene.util.CloseableThreadLocal;
-
-/** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
- * Directory. Pairs are accessed either by Term or by ordinal position the
- * set. */
-
-final class TermInfosReader implements Closeable {
- private final Directory directory;
- private final String segment;
- private final FieldInfos fieldInfos;
-
- private final CloseableThreadLocal<ThreadResources> threadResources = new CloseableThreadLocal<ThreadResources>();
- private final SegmentTermEnum origEnum;
- private final long size;
-
- private final Term[] indexTerms;
- private final TermInfo[] indexInfos;
- private final long[] indexPointers;
-
- private final int totalIndexInterval;
-
- private final static int DEFAULT_CACHE_SIZE = 1024;
-
- // Just adds term's ord to TermInfo
- private final static class TermInfoAndOrd extends TermInfo {
- final long termOrd;
- public TermInfoAndOrd(TermInfo ti, long termOrd) {
- super(ti);
- assert termOrd >= 0;
- this.termOrd = termOrd;
- }
- }
-
- private static class CloneableTerm extends DoubleBarrelLRUCache.CloneableKey {
- private final Term term;
-
- public CloneableTerm(Term t) {
- this.term = new Term(t.field(), t.text());
- }
-
- @Override
- public Object clone() {
- return new CloneableTerm(term);
- }
-
- @Override
- public boolean equals(Object _other) {
- CloneableTerm other = (CloneableTerm) _other;
- return term.equals(other.term);
- }
-
- @Override
- public int hashCode() {
- return term.hashCode();
- }
- }
-
- private final DoubleBarrelLRUCache<CloneableTerm,TermInfoAndOrd> termsCache = new DoubleBarrelLRUCache<CloneableTerm,TermInfoAndOrd>(DEFAULT_CACHE_SIZE);
-
- /**
- * Per-thread resources managed by ThreadLocal
- */
- private static final class ThreadResources {
- SegmentTermEnum termEnum;
- }
-
- TermInfosReader(Directory dir, String seg, FieldInfos fis, int readBufferSize, int indexDivisor)
- throws CorruptIndexException, IOException {
- boolean success = false;
-
- if (indexDivisor < 1 && indexDivisor != -1) {
- throw new IllegalArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor);
- }
-
- try {
- directory = dir;
- segment = seg;
- fieldInfos = fis;
-
- origEnum = new SegmentTermEnum(directory.openInput(IndexFileNames.segmentFileName(segment, IndexFileNames.TERMS_EXTENSION),
- readBufferSize), fieldInfos, false);
- size = origEnum.size;
-
-
- if (indexDivisor != -1) {
- // Load terms index
- totalIndexInterval = origEnum.indexInterval * indexDivisor;
- final SegmentTermEnum indexEnum = new SegmentTermEnum(directory.openInput(IndexFileNames.segmentFileName(segment, IndexFileNames.TERMS_INDEX_EXTENSION),
- readBufferSize), fieldInfos, true);
-
- try {
- int indexSize = 1+((int)indexEnum.size-1)/indexDivisor; // otherwise read index
-
- indexTerms = new Term[indexSize];
- indexInfos = new TermInfo[indexSize];
- indexPointers = new long[indexSize];
-
- for (int i = 0; indexEnum.next(); i++) {
- indexTerms[i] = indexEnum.term();
- indexInfos[i] = indexEnum.termInfo();
- indexPointers[i] = indexEnum.indexPointer;
-
- for (int j = 1; j < indexDivisor; j++)
- if (!indexEnum.next())
- break;
- }
- } finally {
- indexEnum.close();
- }
- } else {
- // Do not load terms index:
- totalIndexInterval = -1;
- indexTerms = null;
- indexInfos = null;
- indexPointers = null;
- }
- success = true;
- } finally {
- // With lock-less commits, it's entirely possible (and
- // fine) to hit a FileNotFound exception above. In
- // this case, we want to explicitly close any subset
- // of things that were opened so that we don't have to
- // wait for a GC to do so.
- if (!success) {
- close();
- }
- }
- }
-
- public int getSkipInterval() {
- return origEnum.skipInterval;
- }
-
- public int getMaxSkipLevels() {
- return origEnum.maxSkipLevels;
- }
-
- public final void close() throws IOException {
- if (origEnum != null)
- origEnum.close();
- threadResources.close();
- }
-
- /** Returns the number of term/value pairs in the set. */
- final long size() {
- return size;
- }
-
- private ThreadResources getThreadResources() {
- ThreadResources resources = threadResources.get();
- if (resources == null) {
- resources = new ThreadResources();
- resources.termEnum = terms();
- threadResources.set(resources);
- }
- return resources;
- }
-
-
- /** Returns the offset of the greatest index entry which is less than or equal to term.*/
- private final int getIndexOffset(Term term) {
- int lo = 0; // binary search indexTerms[]
- int hi = indexTerms.length - 1;
-
- while (hi >= lo) {
- int mid = (lo + hi) >>> 1;
- int delta = term.compareTo(indexTerms[mid]);
- if (delta < 0)
- hi = mid - 1;
- else if (delta > 0)
- lo = mid + 1;
- else
- return mid;
- }
- return hi;
- }
-
- private final void seekEnum(SegmentTermEnum enumerator, int indexOffset) throws IOException {
- enumerator.seek(indexPointers[indexOffset],
- ((long) indexOffset * totalIndexInterval) - 1,
- indexTerms[indexOffset], indexInfos[indexOffset]);
- }
-
- /** Returns the TermInfo for a Term in the set, or null. */
- TermInfo get(Term term) throws IOException {
- return get(term, false);
- }
-
- /** Returns the TermInfo for a Term in the set, or null. */
- private TermInfo get(Term term, boolean mustSeekEnum) throws IOException {
- if (size == 0) return null;
-
- ensureIndexIsRead();
-
- final CloneableTerm cacheKey = new CloneableTerm(term);
-
- TermInfoAndOrd tiOrd = termsCache.get(cacheKey);
- ThreadResources resources = getThreadResources();
-
- if (!mustSeekEnum && tiOrd != null) {
- return tiOrd;
- }
-
- // optimize sequential access: first try scanning cached enum w/o seeking
- SegmentTermEnum enumerator = resources.termEnum;
- if (enumerator.term() != null // term is at or past current
- && ((enumerator.prev() != null && term.compareTo(enumerator.prev())> 0)
- || term.compareTo(enumerator.term()) >= 0)) {
- int enumOffset = (int)(enumerator.position/totalIndexInterval)+1;
- if (indexTerms.length == enumOffset // but before end of block
- || term.compareTo(indexTerms[enumOffset]) < 0) {
- // no need to seek
-
- final TermInfo ti;
-
- int numScans = enumerator.scanTo(term);
- if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0) {
- ti = enumerator.termInfo();
- if (numScans > 1) {
- // we only want to put this TermInfo into the cache if
- // scanEnum skipped more than one dictionary entry.
- // This prevents RangeQueries or WildcardQueries to
- // wipe out the cache when they iterate over a large numbers
- // of terms in order
- if (tiOrd == null) {
- termsCache.put(cacheKey, new TermInfoAndOrd(ti, enumerator.position));
- } else {
- assert sameTermInfo(ti, tiOrd, enumerator);
- assert (int) enumerator.position == tiOrd.termOrd;
- }
- }
- } else {
- ti = null;
- }
-
- return ti;
- }
- }
-
- // random-access: must seek
- final int indexPos;
- if (tiOrd != null) {
- indexPos = (int) (tiOrd.termOrd / totalIndexInterval);
- } else {
- // Must do binary search:
- indexPos = getIndexOffset(term);
- }
-
- seekEnum(enumerator, indexPos);
- enumerator.scanTo(term);
- final TermInfo ti;
- if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0) {
- ti = enumerator.termInfo();
- if (tiOrd == null) {
- // LUCENE-3183: it's possible, if term is Term("",
- // ""), for the STE to be incorrectly un-positioned
- // after scan-to; work around this by not caching in
- // this case:
- if (enumerator.position >= 0) {
- termsCache.put(cacheKey, new TermInfoAndOrd(ti, enumerator.position));
- }
- } else {
- assert sameTermInfo(ti, tiOrd, enumerator);
- assert enumerator.position == tiOrd.termOrd;
- }
- } else {
- ti = null;
- }
- return ti;
- }
-
- // called only from asserts
- private final boolean sameTermInfo(TermInfo ti1, TermInfo ti2, SegmentTermEnum enumerator) {
- if (ti1.docFreq != ti2.docFreq) {
- return false;
- }
- if (ti1.freqPointer != ti2.freqPointer) {
- return false;
- }
- if (ti1.proxPointer != ti2.proxPointer) {
- return false;
- }
- // skipOffset is only valid when docFreq >= skipInterval:
- if (ti1.docFreq >= enumerator.skipInterval &&
- ti1.skipOffset != ti2.skipOffset) {
- return false;
- }
- return true;
- }
-
- private void ensureIndexIsRead() {
- if (indexTerms == null) {
- throw new IllegalStateException("terms index was not loaded when this reader was created");
- }
- }
-
- /** Returns the position of a Term in the set or -1. */
- final long getPosition(Term term) throws IOException {
- if (size == 0) return -1;
-
- ensureIndexIsRead();
- int indexOffset = getIndexOffset(term);
-
- SegmentTermEnum enumerator = getThreadResources().termEnum;
- seekEnum(enumerator, indexOffset);
-
- while(term.compareTo(enumerator.term()) > 0 && enumerator.next()) {}
-
- if (term.compareTo(enumerator.term()) == 0)
- return enumerator.position;
- else
- return -1;
- }
-
- /** Returns an enumeration of all the Terms and TermInfos in the set. */
- public SegmentTermEnum terms() {
- return (SegmentTermEnum)origEnum.clone();
- }
-
- /** Returns an enumeration of terms starting at or after the named term. */
- public SegmentTermEnum terms(Term term) throws IOException {
- get(term, true);
- return (SegmentTermEnum)getThreadResources().termEnum.clone();
- }
-}