X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java diff --git a/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java b/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java deleted file mode 100644 index 71e23a9..0000000 --- a/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java +++ /dev/null @@ -1,173 +0,0 @@ -package org.apache.lucene.search; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.util.FixedBitSet; -import org.apache.lucene.index.TermDocs; // for javadocs - -/** - * A {@link Filter} that only accepts documents whose single - * term value in the specified field is contained in the - * provided set of allowed terms. - * - *

- * - * This is the same functionality as TermsFilter (from - * contrib/queries), except this filter requires that the - * field contains only a single term for all documents. - * Because of drastically different implementations, they - * also have different performance characteristics, as - * described below. - * - *

- * - * The first invocation of this filter on a given field will - * be slower, since a {@link FieldCache.StringIndex} must be - * created. Subsequent invocations using the same field - * will re-use this cache. However, as with all - * functionality based on {@link FieldCache}, persistent RAM - * is consumed to hold the cache, and is not freed until the - * {@link IndexReader} is closed. In contrast, TermsFilter - * has no persistent RAM consumption. - * - * - *

- * - * With each search, this filter translates the specified - * set of Terms into a private {@link FixedBitSet} keyed by - * term number per unique {@link IndexReader} (normally one - * reader per segment). Then, during matching, the term - * number for each docID is retrieved from the cache and - * then checked for inclusion using the {@link FixedBitSet}. - * Since all testing is done using RAM resident data - * structures, performance should be very fast, most likely - * fast enough to not require further caching of the - * DocIdSet for each possible combination of terms. - * However, because docIDs are simply scanned linearly, an - * index with a great many small documents may find this - * linear scan too costly. - * - *

- * - * In contrast, TermsFilter builds up an {@link FixedBitSet}, - * keyed by docID, every time it's created, by enumerating - * through all matching docs using {@link TermDocs} to seek - * and scan through each term's docID list. While there is - * no linear scan of all docIDs, besides the allocation of - * the underlying array in the {@link FixedBitSet}, this - * approach requires a number of "disk seeks" in proportion - * to the number of terms, which can be exceptionally costly - * when there are cache misses in the OS's IO cache. - * - *

- * - * Generally, this filter will be slower on the first - * invocation for a given field, but subsequent invocations, - * even if you change the allowed set of Terms, should be - * faster than TermsFilter, especially as the number of - * Terms being matched increases. If you are matching only - * a very small number of terms, and those terms in turn - * match a very small number of documents, TermsFilter may - * perform faster. - * - *

- * - * Which filter is best is very application dependent. - */ - -public class FieldCacheTermsFilter extends Filter { - private String field; - private String[] terms; - - public FieldCacheTermsFilter(String field, String... terms) { - this.field = field; - this.terms = terms; - } - - public FieldCache getFieldCache() { - return FieldCache.DEFAULT; - } - - @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - return new FieldCacheTermsFilterDocIdSet(getFieldCache().getStringIndex(reader, field)); - } - - protected class FieldCacheTermsFilterDocIdSet extends DocIdSet { - private FieldCache.StringIndex fcsi; - - private FixedBitSet bits; - - public FieldCacheTermsFilterDocIdSet(FieldCache.StringIndex fcsi) { - this.fcsi = fcsi; - bits = new FixedBitSet(this.fcsi.lookup.length); - for (int i=0;i 0) { - bits.set(termNumber); - } - } - } - - @Override - public DocIdSetIterator iterator() { - return new FieldCacheTermsFilterDocIdSetIterator(); - } - - /** This DocIdSet implementation is cacheable. */ - @Override - public boolean isCacheable() { - return true; - } - - protected class FieldCacheTermsFilterDocIdSetIterator extends DocIdSetIterator { - private int doc = -1; - - @Override - public int docID() { - return doc; - } - - @Override - public int nextDoc() { - try { - while (!bits.get(fcsi.order[++doc])) {} - } catch (ArrayIndexOutOfBoundsException e) { - doc = NO_MORE_DOCS; - } - return doc; - } - - @Override - public int advance(int target) { - try { - doc = target; - while (!bits.get(fcsi.order[doc])) { - doc++; - } - } catch (ArrayIndexOutOfBoundsException e) { - doc = NO_MORE_DOCS; - } - return doc; - } - } - } -}