1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.index.IndexReader;
21 import org.apache.lucene.util.NumericUtils;
22 import org.apache.lucene.util.RamUsageEstimator;
23 import org.apache.lucene.document.NumericField; // for javadocs
24 import org.apache.lucene.analysis.NumericTokenStream; // for javadocs
26 import java.io.IOException;
27 import java.io.Serializable;
28 import java.io.PrintStream;
30 import java.text.DecimalFormat;
33 * Expert: Maintains caches of term values.
35 * <p>Created: May 19, 2004 11:13:14 AM
38 * @see org.apache.lucene.util.FieldCacheSanityChecker
40 public interface FieldCache {
42 public static final class CreationPlaceholder {
46 /** Indicator for StringIndex values in the cache. */
47 // NOTE: the value assigned to this constant must not be
48 // the same as any of those in SortField!!
49 public static final int STRING_INDEX = -1;
52 /** Expert: Stores term text values and document ordering data. */
53 public static class StringIndex {
55 public int binarySearchLookup(String key) {
56 // this special case is the reason that Arrays.binarySearch() isn't useful.
61 int high = lookup.length-1;
64 int mid = (low + high) >>> 1;
65 int cmp = lookup[mid].compareTo(key);
72 return mid; // key found
74 return -(low + 1); // key not found.
77 /** All the term values, in natural order. */
78 public final String[] lookup;
80 /** For each document, an index into the lookup array. */
81 public final int[] order;
83 /** Creates one of these objects */
84 public StringIndex (int[] values, String[] lookup) {
91 * Marker interface as super-interface to all parsers. It
92 * is used to specify a custom parser to {@link
93 * SortField#SortField(String, FieldCache.Parser)}.
95 public interface Parser extends Serializable {
98 /** Interface to parse bytes from document fields.
99 * @see FieldCache#getBytes(IndexReader, String, FieldCache.ByteParser)
101 public interface ByteParser extends Parser {
102 /** Return a single Byte representation of this field's value. */
103 public byte parseByte(String string);
106 /** Interface to parse shorts from document fields.
107 * @see FieldCache#getShorts(IndexReader, String, FieldCache.ShortParser)
109 public interface ShortParser extends Parser {
110 /** Return a short representation of this field's value. */
111 public short parseShort(String string);
114 /** Interface to parse ints from document fields.
115 * @see FieldCache#getInts(IndexReader, String, FieldCache.IntParser)
117 public interface IntParser extends Parser {
118 /** Return an integer representation of this field's value. */
119 public int parseInt(String string);
122 /** Interface to parse floats from document fields.
123 * @see FieldCache#getFloats(IndexReader, String, FieldCache.FloatParser)
125 public interface FloatParser extends Parser {
126 /** Return an float representation of this field's value. */
127 public float parseFloat(String string);
130 /** Interface to parse long from document fields.
131 * @see FieldCache#getLongs(IndexReader, String, FieldCache.LongParser)
133 public interface LongParser extends Parser {
134 /** Return an long representation of this field's value. */
135 public long parseLong(String string);
138 /** Interface to parse doubles from document fields.
139 * @see FieldCache#getDoubles(IndexReader, String, FieldCache.DoubleParser)
141 public interface DoubleParser extends Parser {
142 /** Return an long representation of this field's value. */
143 public double parseDouble(String string);
146 /** Expert: The cache used internally by sorting and range query classes. */
147 public static FieldCache DEFAULT = new FieldCacheImpl();
149 /** The default parser for byte values, which are encoded by {@link Byte#toString(byte)} */
150 public static final ByteParser DEFAULT_BYTE_PARSER = new ByteParser() {
151 public byte parseByte(String value) {
152 return Byte.parseByte(value);
154 protected Object readResolve() {
155 return DEFAULT_BYTE_PARSER;
158 public String toString() {
159 return FieldCache.class.getName()+".DEFAULT_BYTE_PARSER";
163 /** The default parser for short values, which are encoded by {@link Short#toString(short)} */
164 public static final ShortParser DEFAULT_SHORT_PARSER = new ShortParser() {
165 public short parseShort(String value) {
166 return Short.parseShort(value);
168 protected Object readResolve() {
169 return DEFAULT_SHORT_PARSER;
172 public String toString() {
173 return FieldCache.class.getName()+".DEFAULT_SHORT_PARSER";
177 /** The default parser for int values, which are encoded by {@link Integer#toString(int)} */
178 public static final IntParser DEFAULT_INT_PARSER = new IntParser() {
179 public int parseInt(String value) {
180 return Integer.parseInt(value);
182 protected Object readResolve() {
183 return DEFAULT_INT_PARSER;
186 public String toString() {
187 return FieldCache.class.getName()+".DEFAULT_INT_PARSER";
191 /** The default parser for float values, which are encoded by {@link Float#toString(float)} */
192 public static final FloatParser DEFAULT_FLOAT_PARSER = new FloatParser() {
193 public float parseFloat(String value) {
194 return Float.parseFloat(value);
196 protected Object readResolve() {
197 return DEFAULT_FLOAT_PARSER;
200 public String toString() {
201 return FieldCache.class.getName()+".DEFAULT_FLOAT_PARSER";
205 /** The default parser for long values, which are encoded by {@link Long#toString(long)} */
206 public static final LongParser DEFAULT_LONG_PARSER = new LongParser() {
207 public long parseLong(String value) {
208 return Long.parseLong(value);
210 protected Object readResolve() {
211 return DEFAULT_LONG_PARSER;
214 public String toString() {
215 return FieldCache.class.getName()+".DEFAULT_LONG_PARSER";
219 /** The default parser for double values, which are encoded by {@link Double#toString(double)} */
220 public static final DoubleParser DEFAULT_DOUBLE_PARSER = new DoubleParser() {
221 public double parseDouble(String value) {
222 return Double.parseDouble(value);
224 protected Object readResolve() {
225 return DEFAULT_DOUBLE_PARSER;
228 public String toString() {
229 return FieldCache.class.getName()+".DEFAULT_DOUBLE_PARSER";
234 * A parser instance for int values encoded by {@link NumericUtils#intToPrefixCoded(int)}, e.g. when indexed
235 * via {@link NumericField}/{@link NumericTokenStream}.
237 public static final IntParser NUMERIC_UTILS_INT_PARSER=new IntParser(){
238 public int parseInt(String val) {
239 final int shift = val.charAt(0)-NumericUtils.SHIFT_START_INT;
240 if (shift>0 && shift<=31)
241 throw new FieldCacheImpl.StopFillCacheException();
242 return NumericUtils.prefixCodedToInt(val);
244 protected Object readResolve() {
245 return NUMERIC_UTILS_INT_PARSER;
248 public String toString() {
249 return FieldCache.class.getName()+".NUMERIC_UTILS_INT_PARSER";
254 * A parser instance for float values encoded with {@link NumericUtils}, e.g. when indexed
255 * via {@link NumericField}/{@link NumericTokenStream}.
257 public static final FloatParser NUMERIC_UTILS_FLOAT_PARSER=new FloatParser(){
258 public float parseFloat(String val) {
259 final int shift = val.charAt(0)-NumericUtils.SHIFT_START_INT;
260 if (shift>0 && shift<=31)
261 throw new FieldCacheImpl.StopFillCacheException();
262 return NumericUtils.sortableIntToFloat(NumericUtils.prefixCodedToInt(val));
264 protected Object readResolve() {
265 return NUMERIC_UTILS_FLOAT_PARSER;
268 public String toString() {
269 return FieldCache.class.getName()+".NUMERIC_UTILS_FLOAT_PARSER";
274 * A parser instance for long values encoded by {@link NumericUtils#longToPrefixCoded(long)}, e.g. when indexed
275 * via {@link NumericField}/{@link NumericTokenStream}.
277 public static final LongParser NUMERIC_UTILS_LONG_PARSER = new LongParser(){
278 public long parseLong(String val) {
279 final int shift = val.charAt(0)-NumericUtils.SHIFT_START_LONG;
280 if (shift>0 && shift<=63)
281 throw new FieldCacheImpl.StopFillCacheException();
282 return NumericUtils.prefixCodedToLong(val);
284 protected Object readResolve() {
285 return NUMERIC_UTILS_LONG_PARSER;
288 public String toString() {
289 return FieldCache.class.getName()+".NUMERIC_UTILS_LONG_PARSER";
294 * A parser instance for double values encoded with {@link NumericUtils}, e.g. when indexed
295 * via {@link NumericField}/{@link NumericTokenStream}.
297 public static final DoubleParser NUMERIC_UTILS_DOUBLE_PARSER = new DoubleParser(){
298 public double parseDouble(String val) {
299 final int shift = val.charAt(0)-NumericUtils.SHIFT_START_LONG;
300 if (shift>0 && shift<=63)
301 throw new FieldCacheImpl.StopFillCacheException();
302 return NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(val));
304 protected Object readResolve() {
305 return NUMERIC_UTILS_DOUBLE_PARSER;
308 public String toString() {
309 return FieldCache.class.getName()+".NUMERIC_UTILS_DOUBLE_PARSER";
313 /** Checks the internal cache for an appropriate entry, and if none is found,
314 * reads the terms in <code>field</code> and returns a bit set at the size of
315 * <code>reader.maxDoc()</code>, with turned on bits for each docid that
316 * does not have a value for this field.
318 public DocIdSet getUnValuedDocs (IndexReader reader, String field)
321 /** Checks the internal cache for an appropriate entry, and if none is
322 * found, reads the terms in <code>field</code> as a single byte and returns an array
323 * of size <code>reader.maxDoc()</code> of the value each document
324 * has in the given field.
325 * @param reader Used to get field values.
326 * @param field Which field contains the single byte values.
327 * @return The values in the given field for each document.
328 * @throws IOException If any error occurs.
330 public byte[] getBytes (IndexReader reader, String field)
333 /** Checks the internal cache for an appropriate entry, and if none is found,
334 * reads the terms in <code>field</code> as bytes and returns an array of
335 * size <code>reader.maxDoc()</code> of the value each document has in the
337 * @param reader Used to get field values.
338 * @param field Which field contains the bytes.
339 * @param parser Computes byte for string values.
340 * @return The values in the given field for each document.
341 * @throws IOException If any error occurs.
343 public byte[] getBytes (IndexReader reader, String field, ByteParser parser)
346 /** Checks the internal cache for an appropriate entry, and if none is
347 * found, reads the terms in <code>field</code> as shorts and returns an array
348 * of size <code>reader.maxDoc()</code> of the value each document
349 * has in the given field.
350 * @param reader Used to get field values.
351 * @param field Which field contains the shorts.
352 * @return The values in the given field for each document.
353 * @throws IOException If any error occurs.
355 public short[] getShorts (IndexReader reader, String field)
358 /** Checks the internal cache for an appropriate entry, and if none is found,
359 * reads the terms in <code>field</code> as shorts and returns an array of
360 * size <code>reader.maxDoc()</code> of the value each document has in the
362 * @param reader Used to get field values.
363 * @param field Which field contains the shorts.
364 * @param parser Computes short for string values.
365 * @return The values in the given field for each document.
366 * @throws IOException If any error occurs.
368 public short[] getShorts (IndexReader reader, String field, ShortParser parser)
371 /** Checks the internal cache for an appropriate entry, and if none is
372 * found, reads the terms in <code>field</code> as integers and returns an array
373 * of size <code>reader.maxDoc()</code> of the value each document
374 * has in the given field.
375 * @param reader Used to get field values.
376 * @param field Which field contains the integers.
377 * @return The values in the given field for each document.
378 * @throws IOException If any error occurs.
380 public int[] getInts (IndexReader reader, String field)
383 /** Checks the internal cache for an appropriate entry, and if none is found,
384 * reads the terms in <code>field</code> as integers and returns an array of
385 * size <code>reader.maxDoc()</code> of the value each document has in the
387 * @param reader Used to get field values.
388 * @param field Which field contains the integers.
389 * @param parser Computes integer for string values.
390 * @return The values in the given field for each document.
391 * @throws IOException If any error occurs.
393 public int[] getInts (IndexReader reader, String field, IntParser parser)
396 /** Checks the internal cache for an appropriate entry, and if
397 * none is found, reads the terms in <code>field</code> as floats and returns an array
398 * of size <code>reader.maxDoc()</code> of the value each document
399 * has in the given field.
400 * @param reader Used to get field values.
401 * @param field Which field contains the floats.
402 * @return The values in the given field for each document.
403 * @throws IOException If any error occurs.
405 public float[] getFloats (IndexReader reader, String field)
408 /** Checks the internal cache for an appropriate entry, and if
409 * none is found, reads the terms in <code>field</code> as floats and returns an array
410 * of size <code>reader.maxDoc()</code> of the value each document
411 * has in the given field.
412 * @param reader Used to get field values.
413 * @param field Which field contains the floats.
414 * @param parser Computes float for string values.
415 * @return The values in the given field for each document.
416 * @throws IOException If any error occurs.
418 public float[] getFloats (IndexReader reader, String field,
419 FloatParser parser) throws IOException;
422 * Checks the internal cache for an appropriate entry, and if none is
423 * found, reads the terms in <code>field</code> as longs and returns an array
424 * of size <code>reader.maxDoc()</code> of the value each document
425 * has in the given field.
427 * @param reader Used to get field values.
428 * @param field Which field contains the longs.
429 * @return The values in the given field for each document.
430 * @throws java.io.IOException If any error occurs.
432 public long[] getLongs(IndexReader reader, String field)
436 * Checks the internal cache for an appropriate entry, and if none is found,
437 * reads the terms in <code>field</code> as longs and returns an array of
438 * size <code>reader.maxDoc()</code> of the value each document has in the
441 * @param reader Used to get field values.
442 * @param field Which field contains the longs.
443 * @param parser Computes integer for string values.
444 * @return The values in the given field for each document.
445 * @throws IOException If any error occurs.
447 public long[] getLongs(IndexReader reader, String field, LongParser parser)
452 * Checks the internal cache for an appropriate entry, and if none is
453 * found, reads the terms in <code>field</code> as integers and returns an array
454 * of size <code>reader.maxDoc()</code> of the value each document
455 * has in the given field.
457 * @param reader Used to get field values.
458 * @param field Which field contains the doubles.
459 * @return The values in the given field for each document.
460 * @throws IOException If any error occurs.
462 public double[] getDoubles(IndexReader reader, String field)
466 * Checks the internal cache for an appropriate entry, and if none is found,
467 * reads the terms in <code>field</code> as doubles and returns an array of
468 * size <code>reader.maxDoc()</code> of the value each document has in the
471 * @param reader Used to get field values.
472 * @param field Which field contains the doubles.
473 * @param parser Computes integer for string values.
474 * @return The values in the given field for each document.
475 * @throws IOException If any error occurs.
477 public double[] getDoubles(IndexReader reader, String field, DoubleParser parser)
480 /** Checks the internal cache for an appropriate entry, and if none
481 * is found, reads the term values in <code>field</code> and returns an array
482 * of size <code>reader.maxDoc()</code> containing the value each document
483 * has in the given field.
484 * @param reader Used to get field values.
485 * @param field Which field contains the strings.
486 * @return The values in the given field for each document.
487 * @throws IOException If any error occurs.
489 public String[] getStrings (IndexReader reader, String field)
492 /** Checks the internal cache for an appropriate entry, and if none
493 * is found reads the term values in <code>field</code> and returns
494 * an array of them in natural order, along with an array telling
495 * which element in the term array each document uses.
496 * @param reader Used to get field values.
497 * @param field Which field contains the strings.
498 * @return Array of terms and index into the array for each document.
499 * @throws IOException If any error occurs.
501 public StringIndex getStringIndex (IndexReader reader, String field)
505 * EXPERT: A unique Identifier/Description for each item in the FieldCache.
506 * Can be useful for logging/debugging.
507 * @lucene.experimental
509 public static abstract class CacheEntry {
510 public abstract Object getReaderKey();
511 public abstract String getFieldName();
512 public abstract Class<?> getCacheType();
513 public abstract Object getCustom();
514 public abstract Object getValue();
515 private String size = null;
516 protected final void setEstimatedSize(String size) {
520 * @see #estimateSize(RamUsageEstimator)
522 public void estimateSize() {
523 estimateSize(new RamUsageEstimator(false)); // doesn't check for interned
526 * Computes (and stores) the estimated size of the cache Value
527 * @see #getEstimatedSize
529 public void estimateSize(RamUsageEstimator ramCalc) {
530 long size = ramCalc.estimateRamUsage(getValue());
531 setEstimatedSize(RamUsageEstimator.humanReadableUnits
532 (size, new DecimalFormat("0.#")));
536 * The most recently estimated size of the value, null unless
537 * estimateSize has been called.
539 public final String getEstimatedSize() {
545 public String toString() {
546 StringBuilder b = new StringBuilder();
547 b.append("'").append(getReaderKey()).append("'=>");
548 b.append("'").append(getFieldName()).append("',");
549 b.append(getCacheType()).append(",").append(getCustom());
550 b.append("=>").append(getValue().getClass().getName()).append("#");
551 b.append(System.identityHashCode(getValue()));
553 String s = getEstimatedSize();
555 b.append(" (size =~ ").append(s).append(')');
564 * EXPERT: Generates an array of CacheEntry objects representing all items
565 * currently in the FieldCache.
567 * NOTE: These CacheEntry objects maintain a strong reference to the
568 * Cached Values. Maintaining references to a CacheEntry the IndexReader
569 * associated with it has garbage collected will prevent the Value itself
570 * from being garbage collected when the Cache drops the WeakReference.
572 * @lucene.experimental
574 public abstract CacheEntry[] getCacheEntries();
578 * EXPERT: Instructs the FieldCache to forcibly expunge all entries
579 * from the underlying caches. This is intended only to be used for
580 * test methods as a way to ensure a known base state of the Cache
581 * (with out needing to rely on GC to free WeakReferences).
582 * It should not be relied on for "Cache maintenance" in general
585 * @lucene.experimental
587 public abstract void purgeAllCaches();
590 * Expert: drops all cache entries associated with this
591 * reader. NOTE: this reader must precisely match the
592 * reader that the cache entry is keyed on. If you pass a
593 * top-level reader, it usually will have no effect as
594 * Lucene now caches at the segment reader level.
596 public abstract void purge(IndexReader r);
599 * If non-null, FieldCacheImpl will warn whenever
600 * entries are created that are not sane according to
601 * {@link org.apache.lucene.util.FieldCacheSanityChecker}.
603 public void setInfoStream(PrintStream stream);
605 /** counterpart of {@link #setInfoStream(PrintStream)} */
606 public PrintStream getInfoStream();