X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/FieldCache.java diff --git a/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/FieldCache.java b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/FieldCache.java new file mode 100644 index 0000000..52db4d4 --- /dev/null +++ b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/FieldCache.java @@ -0,0 +1,700 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.NumericUtils; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.document.NumericField; // for javadocs +import org.apache.lucene.analysis.NumericTokenStream; // for javadocs + +import java.io.IOException; +import java.io.Serializable; +import java.io.PrintStream; + +import java.text.DecimalFormat; + +/** + * Expert: Maintains caches of term values. + * + *

Created: May 19, 2004 11:13:14 AM + * + * @since lucene 1.4 + * @see org.apache.lucene.util.FieldCacheSanityChecker + */ +public interface FieldCache { + + public static final class CreationPlaceholder { + Object value; + } + + /** Indicator for StringIndex values in the cache. */ + // NOTE: the value assigned to this constant must not be + // the same as any of those in SortField!! + public static final int STRING_INDEX = -1; + + + /** Expert: Stores term text values and document ordering data. */ + public static class StringIndex { + + public int binarySearchLookup(String key) { + // this special case is the reason that Arrays.binarySearch() isn't useful. + if (key == null) + return 0; + + int low = 1; + int high = lookup.length-1; + + while (low <= high) { + int mid = (low + high) >>> 1; + int cmp = lookup[mid].compareTo(key); + + if (cmp < 0) + low = mid + 1; + else if (cmp > 0) + high = mid - 1; + else + return mid; // key found + } + return -(low + 1); // key not found. + } + + /** All the term values, in natural order. */ + public final String[] lookup; + + /** For each document, an index into the lookup array. */ + public final int[] order; + + /** Creates one of these objects */ + public StringIndex (int[] values, String[] lookup) { + this.order = values; + this.lookup = lookup; + } + } + + /** + * Marker interface as super-interface to all parsers. It + * is used to specify a custom parser to {@link + * SortField#SortField(String, FieldCache.Parser)}. + */ + public interface Parser extends Serializable { + } + + /** Interface to parse bytes from document fields. + * @see FieldCache#getBytes(IndexReader, String, FieldCache.ByteParser) + */ + public interface ByteParser extends Parser { + /** Return a single Byte representation of this field's value. */ + public byte parseByte(String string); + } + + /** Interface to parse shorts from document fields. + * @see FieldCache#getShorts(IndexReader, String, FieldCache.ShortParser) + */ + public interface ShortParser extends Parser { + /** Return a short representation of this field's value. */ + public short parseShort(String string); + } + + /** Interface to parse ints from document fields. + * @see FieldCache#getInts(IndexReader, String, FieldCache.IntParser) + */ + public interface IntParser extends Parser { + /** Return an integer representation of this field's value. */ + public int parseInt(String string); + } + + /** Interface to parse floats from document fields. + * @see FieldCache#getFloats(IndexReader, String, FieldCache.FloatParser) + */ + public interface FloatParser extends Parser { + /** Return an float representation of this field's value. */ + public float parseFloat(String string); + } + + /** Interface to parse long from document fields. + * @see FieldCache#getLongs(IndexReader, String, FieldCache.LongParser) + */ + public interface LongParser extends Parser { + /** Return an long representation of this field's value. */ + public long parseLong(String string); + } + + /** Interface to parse doubles from document fields. + * @see FieldCache#getDoubles(IndexReader, String, FieldCache.DoubleParser) + */ + public interface DoubleParser extends Parser { + /** Return an long representation of this field's value. */ + public double parseDouble(String string); + } + + /** Expert: The cache used internally by sorting and range query classes. */ + public static FieldCache DEFAULT = new FieldCacheImpl(); + + /** The default parser for byte values, which are encoded by {@link Byte#toString(byte)} */ + public static final ByteParser DEFAULT_BYTE_PARSER = new ByteParser() { + public byte parseByte(String value) { + return Byte.parseByte(value); + } + protected Object readResolve() { + return DEFAULT_BYTE_PARSER; + } + @Override + public String toString() { + return FieldCache.class.getName()+".DEFAULT_BYTE_PARSER"; + } + }; + + /** The default parser for short values, which are encoded by {@link Short#toString(short)} */ + public static final ShortParser DEFAULT_SHORT_PARSER = new ShortParser() { + public short parseShort(String value) { + return Short.parseShort(value); + } + protected Object readResolve() { + return DEFAULT_SHORT_PARSER; + } + @Override + public String toString() { + return FieldCache.class.getName()+".DEFAULT_SHORT_PARSER"; + } + }; + + /** The default parser for int values, which are encoded by {@link Integer#toString(int)} */ + public static final IntParser DEFAULT_INT_PARSER = new IntParser() { + public int parseInt(String value) { + return Integer.parseInt(value); + } + protected Object readResolve() { + return DEFAULT_INT_PARSER; + } + @Override + public String toString() { + return FieldCache.class.getName()+".DEFAULT_INT_PARSER"; + } + }; + + /** The default parser for float values, which are encoded by {@link Float#toString(float)} */ + public static final FloatParser DEFAULT_FLOAT_PARSER = new FloatParser() { + public float parseFloat(String value) { + return Float.parseFloat(value); + } + protected Object readResolve() { + return DEFAULT_FLOAT_PARSER; + } + @Override + public String toString() { + return FieldCache.class.getName()+".DEFAULT_FLOAT_PARSER"; + } + }; + + /** The default parser for long values, which are encoded by {@link Long#toString(long)} */ + public static final LongParser DEFAULT_LONG_PARSER = new LongParser() { + public long parseLong(String value) { + return Long.parseLong(value); + } + protected Object readResolve() { + return DEFAULT_LONG_PARSER; + } + @Override + public String toString() { + return FieldCache.class.getName()+".DEFAULT_LONG_PARSER"; + } + }; + + /** The default parser for double values, which are encoded by {@link Double#toString(double)} */ + public static final DoubleParser DEFAULT_DOUBLE_PARSER = new DoubleParser() { + public double parseDouble(String value) { + return Double.parseDouble(value); + } + protected Object readResolve() { + return DEFAULT_DOUBLE_PARSER; + } + @Override + public String toString() { + return FieldCache.class.getName()+".DEFAULT_DOUBLE_PARSER"; + } + }; + + /** + * A parser instance for int values encoded by {@link NumericUtils#intToPrefixCoded(int)}, e.g. when indexed + * via {@link NumericField}/{@link NumericTokenStream}. + */ + public static final IntParser NUMERIC_UTILS_INT_PARSER=new IntParser(){ + public int parseInt(String val) { + final int shift = val.charAt(0)-NumericUtils.SHIFT_START_INT; + if (shift>0 && shift<=31) + throw new FieldCacheImpl.StopFillCacheException(); + return NumericUtils.prefixCodedToInt(val); + } + protected Object readResolve() { + return NUMERIC_UTILS_INT_PARSER; + } + @Override + public String toString() { + return FieldCache.class.getName()+".NUMERIC_UTILS_INT_PARSER"; + } + }; + + /** + * A parser instance for float values encoded with {@link NumericUtils}, e.g. when indexed + * via {@link NumericField}/{@link NumericTokenStream}. + */ + public static final FloatParser NUMERIC_UTILS_FLOAT_PARSER=new FloatParser(){ + public float parseFloat(String val) { + final int shift = val.charAt(0)-NumericUtils.SHIFT_START_INT; + if (shift>0 && shift<=31) + throw new FieldCacheImpl.StopFillCacheException(); + return NumericUtils.sortableIntToFloat(NumericUtils.prefixCodedToInt(val)); + } + protected Object readResolve() { + return NUMERIC_UTILS_FLOAT_PARSER; + } + @Override + public String toString() { + return FieldCache.class.getName()+".NUMERIC_UTILS_FLOAT_PARSER"; + } + }; + + /** + * A parser instance for long values encoded by {@link NumericUtils#longToPrefixCoded(long)}, e.g. when indexed + * via {@link NumericField}/{@link NumericTokenStream}. + */ + public static final LongParser NUMERIC_UTILS_LONG_PARSER = new LongParser(){ + public long parseLong(String val) { + final int shift = val.charAt(0)-NumericUtils.SHIFT_START_LONG; + if (shift>0 && shift<=63) + throw new FieldCacheImpl.StopFillCacheException(); + return NumericUtils.prefixCodedToLong(val); + } + protected Object readResolve() { + return NUMERIC_UTILS_LONG_PARSER; + } + @Override + public String toString() { + return FieldCache.class.getName()+".NUMERIC_UTILS_LONG_PARSER"; + } + }; + + /** + * A parser instance for double values encoded with {@link NumericUtils}, e.g. when indexed + * via {@link NumericField}/{@link NumericTokenStream}. + */ + public static final DoubleParser NUMERIC_UTILS_DOUBLE_PARSER = new DoubleParser(){ + public double parseDouble(String val) { + final int shift = val.charAt(0)-NumericUtils.SHIFT_START_LONG; + if (shift>0 && shift<=63) + throw new FieldCacheImpl.StopFillCacheException(); + return NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(val)); + } + protected Object readResolve() { + return NUMERIC_UTILS_DOUBLE_PARSER; + } + @Override + public String toString() { + return FieldCache.class.getName()+".NUMERIC_UTILS_DOUBLE_PARSER"; + } + }; + + /** Checks the internal cache for an appropriate entry, and if none is found, + * reads the terms in field and returns a bit set at the size of + * reader.maxDoc(), with turned on bits for each docid that + * does have a value for this field. + */ + public Bits getDocsWithField(IndexReader reader, String field) + throws IOException; + + /** Checks the internal cache for an appropriate entry, and if none is + * found, reads the terms in field as a single byte and returns an array + * of size reader.maxDoc() of the value each document + * has in the given field. + * @param reader Used to get field values. + * @param field Which field contains the single byte values. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public byte[] getBytes (IndexReader reader, String field) + throws IOException; + + /** Checks the internal cache for an appropriate entry, and if none is found, + * reads the terms in field as bytes and returns an array of + * size reader.maxDoc() of the value each document has in the + * given field. + * @param reader Used to get field values. + * @param field Which field contains the bytes. + * @param parser Computes byte for string values. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public byte[] getBytes (IndexReader reader, String field, ByteParser parser) + throws IOException; + + /** Checks the internal cache for an appropriate entry, and if none is found, + * reads the terms in field as bytes and returns an array of + * size reader.maxDoc() of the value each document has in the + * given field. + * @param reader Used to get field values. + * @param field Which field contains the bytes. + * @param parser Computes byte for string values. + * @param setDocsWithField If true then {@link #getDocsWithField} will + * also be computed and stored in the FieldCache. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public byte[] getBytes (IndexReader reader, String field, ByteParser parser, boolean setDocsWithField) + throws IOException; + + /** Checks the internal cache for an appropriate entry, and if none is + * found, reads the terms in field as shorts and returns an array + * of size reader.maxDoc() of the value each document + * has in the given field. + * @param reader Used to get field values. + * @param field Which field contains the shorts. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public short[] getShorts (IndexReader reader, String field) + throws IOException; + + /** Checks the internal cache for an appropriate entry, and if none is found, + * reads the terms in field as shorts and returns an array of + * size reader.maxDoc() of the value each document has in the + * given field. + * @param reader Used to get field values. + * @param field Which field contains the shorts. + * @param parser Computes short for string values. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public short[] getShorts (IndexReader reader, String field, ShortParser parser) + throws IOException; + + /** Checks the internal cache for an appropriate entry, and if none is found, + * reads the terms in field as shorts and returns an array of + * size reader.maxDoc() of the value each document has in the + * given field. + * @param reader Used to get field values. + * @param field Which field contains the shorts. + * @param parser Computes short for string values. + * @param setDocsWithField If true then {@link #getDocsWithField} will + * also be computed and stored in the FieldCache. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public short[] getShorts (IndexReader reader, String field, ShortParser parser, boolean setDocsWithField) + throws IOException; + + /** Checks the internal cache for an appropriate entry, and if none is + * found, reads the terms in field as integers and returns an array + * of size reader.maxDoc() of the value each document + * has in the given field. + * @param reader Used to get field values. + * @param field Which field contains the integers. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public int[] getInts (IndexReader reader, String field) + throws IOException; + + /** Checks the internal cache for an appropriate entry, and if none is found, + * reads the terms in field as integers and returns an array of + * size reader.maxDoc() of the value each document has in the + * given field. + * @param reader Used to get field values. + * @param field Which field contains the integers. + * @param parser Computes integer for string values. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public int[] getInts (IndexReader reader, String field, IntParser parser) + throws IOException; + + /** Checks the internal cache for an appropriate entry, and if none is found, + * reads the terms in field as integers and returns an array of + * size reader.maxDoc() of the value each document has in the + * given field. + * @param reader Used to get field values. + * @param field Which field contains the integers. + * @param parser Computes integer for string values. + * @param setDocsWithField If true then {@link #getDocsWithField} will + * also be computed and stored in the FieldCache. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public int[] getInts (IndexReader reader, String field, IntParser parser, boolean setDocsWithField) + throws IOException; + + /** Checks the internal cache for an appropriate entry, and if + * none is found, reads the terms in field as floats and returns an array + * of size reader.maxDoc() of the value each document + * has in the given field. + * @param reader Used to get field values. + * @param field Which field contains the floats. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public float[] getFloats (IndexReader reader, String field) + throws IOException; + + /** Checks the internal cache for an appropriate entry, and if + * none is found, reads the terms in field as floats and returns an array + * of size reader.maxDoc() of the value each document + * has in the given field. + * @param reader Used to get field values. + * @param field Which field contains the floats. + * @param parser Computes float for string values. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public float[] getFloats (IndexReader reader, String field, + FloatParser parser) throws IOException; + + /** Checks the internal cache for an appropriate entry, and if + * none is found, reads the terms in field as floats and returns an array + * of size reader.maxDoc() of the value each document + * has in the given field. + * @param reader Used to get field values. + * @param field Which field contains the floats. + * @param parser Computes float for string values. + * @param setDocsWithField If true then {@link #getDocsWithField} will + * also be computed and stored in the FieldCache. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public float[] getFloats (IndexReader reader, String field, + FloatParser parser, boolean setDocsWithField) throws IOException; + + /** + * Checks the internal cache for an appropriate entry, and if none is + * found, reads the terms in field as longs and returns an array + * of size reader.maxDoc() of the value each document + * has in the given field. + * + * @param reader Used to get field values. + * @param field Which field contains the longs. + * @return The values in the given field for each document. + * @throws java.io.IOException If any error occurs. + */ + public long[] getLongs(IndexReader reader, String field) + throws IOException; + + /** + * Checks the internal cache for an appropriate entry, and if none is found, + * reads the terms in field as longs and returns an array of + * size reader.maxDoc() of the value each document has in the + * given field. + * + * @param reader Used to get field values. + * @param field Which field contains the longs. + * @param parser Computes integer for string values. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public long[] getLongs(IndexReader reader, String field, LongParser parser) + throws IOException; + /** + * Checks the internal cache for an appropriate entry, and if none is found, + * reads the terms in field as longs and returns an array of + * size reader.maxDoc() of the value each document has in the + * given field. + * + * @param reader Used to get field values. + * @param field Which field contains the longs. + * @param parser Computes integer for string values. + * @param setDocsWithField If true then {@link #getDocsWithField} will + * also be computed and stored in the FieldCache. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public long[] getLongs(IndexReader reader, String field, LongParser parser, boolean setDocsWithField) + throws IOException; + + /** + * Checks the internal cache for an appropriate entry, and if none is + * found, reads the terms in field as integers and returns an array + * of size reader.maxDoc() of the value each document + * has in the given field. + * + * @param reader Used to get field values. + * @param field Which field contains the doubles. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public double[] getDoubles(IndexReader reader, String field) + throws IOException; + + /** + * Checks the internal cache for an appropriate entry, and if none is found, + * reads the terms in field as doubles and returns an array of + * size reader.maxDoc() of the value each document has in the + * given field. + * + * @param reader Used to get field values. + * @param field Which field contains the doubles. + * @param parser Computes integer for string values. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public double[] getDoubles(IndexReader reader, String field, DoubleParser parser) + throws IOException; + + /** + * Checks the internal cache for an appropriate entry, and if none is found, + * reads the terms in field as doubles and returns an array of + * size reader.maxDoc() of the value each document has in the + * given field. + * + * @param reader Used to get field values. + * @param field Which field contains the doubles. + * @param parser Computes integer for string values. + * @param setDocsWithField If true then {@link #getDocsWithField} will + * also be computed and stored in the FieldCache. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public double[] getDoubles(IndexReader reader, String field, DoubleParser parser, boolean setDocsWithField) + throws IOException; + + /** Checks the internal cache for an appropriate entry, and if none + * is found, reads the term values in field and returns an array + * of size reader.maxDoc() containing the value each document + * has in the given field. + * @param reader Used to get field values. + * @param field Which field contains the strings. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public String[] getStrings (IndexReader reader, String field) + throws IOException; + + /** Checks the internal cache for an appropriate entry, and if none + * is found reads the term values in field and returns + * an array of them in natural order, along with an array telling + * which element in the term array each document uses. + * @param reader Used to get field values. + * @param field Which field contains the strings. + * @return Array of terms and index into the array for each document. + * @throws IOException If any error occurs. + */ + public StringIndex getStringIndex (IndexReader reader, String field) + throws IOException; + + /** + * EXPERT: A unique Identifier/Description for each item in the FieldCache. + * Can be useful for logging/debugging. + * @lucene.experimental + */ + public static abstract class CacheEntry { + public abstract Object getReaderKey(); + public abstract String getFieldName(); + public abstract Class getCacheType(); + public abstract Object getCustom(); + public abstract Object getValue(); + private String size = null; + protected final void setEstimatedSize(String size) { + this.size = size; + } + /** + * @see #estimateSize(RamUsageEstimator) + */ + public void estimateSize() { + estimateSize(new RamUsageEstimator(false)); // doesn't check for interned + } + /** + * Computes (and stores) the estimated size of the cache Value + * @see #getEstimatedSize + */ + public void estimateSize(RamUsageEstimator ramCalc) { + long size = ramCalc.estimateRamUsage(getValue()); + setEstimatedSize(RamUsageEstimator.humanReadableUnits + (size, new DecimalFormat("0.#"))); + + } + /** + * The most recently estimated size of the value, null unless + * estimateSize has been called. + */ + public final String getEstimatedSize() { + return size; + } + + + @Override + public String toString() { + StringBuilder b = new StringBuilder(); + b.append("'").append(getReaderKey()).append("'=>"); + b.append("'").append(getFieldName()).append("',"); + b.append(getCacheType()).append(",").append(getCustom()); + b.append("=>").append(getValue().getClass().getName()).append("#"); + b.append(System.identityHashCode(getValue())); + + String s = getEstimatedSize(); + if(null != s) { + b.append(" (size =~ ").append(s).append(')'); + } + + return b.toString(); + } + + } + + /** + * EXPERT: Generates an array of CacheEntry objects representing all items + * currently in the FieldCache. + *

+ * NOTE: These CacheEntry objects maintain a strong reference to the + * Cached Values. Maintaining references to a CacheEntry the IndexReader + * associated with it has garbage collected will prevent the Value itself + * from being garbage collected when the Cache drops the WeakReference. + *

+ * @lucene.experimental + */ + public abstract CacheEntry[] getCacheEntries(); + + /** + *

+ * EXPERT: Instructs the FieldCache to forcibly expunge all entries + * from the underlying caches. This is intended only to be used for + * test methods as a way to ensure a known base state of the Cache + * (with out needing to rely on GC to free WeakReferences). + * It should not be relied on for "Cache maintenance" in general + * application code. + *

+ * @lucene.experimental + */ + public abstract void purgeAllCaches(); + + /** + * Expert: drops all cache entries associated with this + * reader. NOTE: this reader must precisely match the + * reader that the cache entry is keyed on. If you pass a + * top-level reader, it usually will have no effect as + * Lucene now caches at the segment reader level. + */ + public abstract void purge(IndexReader r); + + /** + * If non-null, FieldCacheImpl will warn whenever + * entries are created that are not sane according to + * {@link org.apache.lucene.util.FieldCacheSanityChecker}. + */ + public void setInfoStream(PrintStream stream); + + /** counterpart of {@link #setInfoStream(PrintStream)} */ + public PrintStream getInfoStream(); +}