1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.index.IndexReader;
21 import org.apache.lucene.util.Bits;
22 import org.apache.lucene.util.NumericUtils;
23 import org.apache.lucene.util.RamUsageEstimator;
24 import org.apache.lucene.document.NumericField; // for javadocs
25 import org.apache.lucene.analysis.NumericTokenStream; // for javadocs
27 import java.io.IOException;
28 import java.io.Serializable;
29 import java.io.PrintStream;
31 import java.text.DecimalFormat;
34 * Expert: Maintains caches of term values.
36 * <p>Created: May 19, 2004 11:13:14 AM
39 * @see org.apache.lucene.util.FieldCacheSanityChecker
41 public interface FieldCache {
43 public static final class CreationPlaceholder {
47 /** Indicator for StringIndex values in the cache. */
48 // NOTE: the value assigned to this constant must not be
49 // the same as any of those in SortField!!
50 public static final int STRING_INDEX = -1;
53 /** Expert: Stores term text values and document ordering data. */
54 public static class StringIndex {
56 public int binarySearchLookup(String key) {
57 // this special case is the reason that Arrays.binarySearch() isn't useful.
62 int high = lookup.length-1;
65 int mid = (low + high) >>> 1;
66 int cmp = lookup[mid].compareTo(key);
73 return mid; // key found
75 return -(low + 1); // key not found.
78 /** All the term values, in natural order. */
79 public final String[] lookup;
81 /** For each document, an index into the lookup array. */
82 public final int[] order;
84 /** Creates one of these objects */
85 public StringIndex (int[] values, String[] lookup) {
92 * Marker interface as super-interface to all parsers. It
93 * is used to specify a custom parser to {@link
94 * SortField#SortField(String, FieldCache.Parser)}.
96 public interface Parser extends Serializable {
99 /** Interface to parse bytes from document fields.
100 * @see FieldCache#getBytes(IndexReader, String, FieldCache.ByteParser)
102 public interface ByteParser extends Parser {
103 /** Return a single Byte representation of this field's value. */
104 public byte parseByte(String string);
107 /** Interface to parse shorts from document fields.
108 * @see FieldCache#getShorts(IndexReader, String, FieldCache.ShortParser)
110 public interface ShortParser extends Parser {
111 /** Return a short representation of this field's value. */
112 public short parseShort(String string);
115 /** Interface to parse ints from document fields.
116 * @see FieldCache#getInts(IndexReader, String, FieldCache.IntParser)
118 public interface IntParser extends Parser {
119 /** Return an integer representation of this field's value. */
120 public int parseInt(String string);
123 /** Interface to parse floats from document fields.
124 * @see FieldCache#getFloats(IndexReader, String, FieldCache.FloatParser)
126 public interface FloatParser extends Parser {
127 /** Return an float representation of this field's value. */
128 public float parseFloat(String string);
131 /** Interface to parse long from document fields.
132 * @see FieldCache#getLongs(IndexReader, String, FieldCache.LongParser)
134 public interface LongParser extends Parser {
135 /** Return an long representation of this field's value. */
136 public long parseLong(String string);
139 /** Interface to parse doubles from document fields.
140 * @see FieldCache#getDoubles(IndexReader, String, FieldCache.DoubleParser)
142 public interface DoubleParser extends Parser {
143 /** Return an long representation of this field's value. */
144 public double parseDouble(String string);
147 /** Expert: The cache used internally by sorting and range query classes. */
148 public static FieldCache DEFAULT = new FieldCacheImpl();
150 /** The default parser for byte values, which are encoded by {@link Byte#toString(byte)} */
151 public static final ByteParser DEFAULT_BYTE_PARSER = new ByteParser() {
152 public byte parseByte(String value) {
153 return Byte.parseByte(value);
155 protected Object readResolve() {
156 return DEFAULT_BYTE_PARSER;
159 public String toString() {
160 return FieldCache.class.getName()+".DEFAULT_BYTE_PARSER";
164 /** The default parser for short values, which are encoded by {@link Short#toString(short)} */
165 public static final ShortParser DEFAULT_SHORT_PARSER = new ShortParser() {
166 public short parseShort(String value) {
167 return Short.parseShort(value);
169 protected Object readResolve() {
170 return DEFAULT_SHORT_PARSER;
173 public String toString() {
174 return FieldCache.class.getName()+".DEFAULT_SHORT_PARSER";
178 /** The default parser for int values, which are encoded by {@link Integer#toString(int)} */
179 public static final IntParser DEFAULT_INT_PARSER = new IntParser() {
180 public int parseInt(String value) {
181 return Integer.parseInt(value);
183 protected Object readResolve() {
184 return DEFAULT_INT_PARSER;
187 public String toString() {
188 return FieldCache.class.getName()+".DEFAULT_INT_PARSER";
192 /** The default parser for float values, which are encoded by {@link Float#toString(float)} */
193 public static final FloatParser DEFAULT_FLOAT_PARSER = new FloatParser() {
194 public float parseFloat(String value) {
195 return Float.parseFloat(value);
197 protected Object readResolve() {
198 return DEFAULT_FLOAT_PARSER;
201 public String toString() {
202 return FieldCache.class.getName()+".DEFAULT_FLOAT_PARSER";
206 /** The default parser for long values, which are encoded by {@link Long#toString(long)} */
207 public static final LongParser DEFAULT_LONG_PARSER = new LongParser() {
208 public long parseLong(String value) {
209 return Long.parseLong(value);
211 protected Object readResolve() {
212 return DEFAULT_LONG_PARSER;
215 public String toString() {
216 return FieldCache.class.getName()+".DEFAULT_LONG_PARSER";
220 /** The default parser for double values, which are encoded by {@link Double#toString(double)} */
221 public static final DoubleParser DEFAULT_DOUBLE_PARSER = new DoubleParser() {
222 public double parseDouble(String value) {
223 return Double.parseDouble(value);
225 protected Object readResolve() {
226 return DEFAULT_DOUBLE_PARSER;
229 public String toString() {
230 return FieldCache.class.getName()+".DEFAULT_DOUBLE_PARSER";
235 * A parser instance for int values encoded by {@link NumericUtils#intToPrefixCoded(int)}, e.g. when indexed
236 * via {@link NumericField}/{@link NumericTokenStream}.
238 public static final IntParser NUMERIC_UTILS_INT_PARSER=new IntParser(){
239 public int parseInt(String val) {
240 final int shift = val.charAt(0)-NumericUtils.SHIFT_START_INT;
241 if (shift>0 && shift<=31)
242 throw new FieldCacheImpl.StopFillCacheException();
243 return NumericUtils.prefixCodedToInt(val);
245 protected Object readResolve() {
246 return NUMERIC_UTILS_INT_PARSER;
249 public String toString() {
250 return FieldCache.class.getName()+".NUMERIC_UTILS_INT_PARSER";
255 * A parser instance for float values encoded with {@link NumericUtils}, e.g. when indexed
256 * via {@link NumericField}/{@link NumericTokenStream}.
258 public static final FloatParser NUMERIC_UTILS_FLOAT_PARSER=new FloatParser(){
259 public float parseFloat(String val) {
260 final int shift = val.charAt(0)-NumericUtils.SHIFT_START_INT;
261 if (shift>0 && shift<=31)
262 throw new FieldCacheImpl.StopFillCacheException();
263 return NumericUtils.sortableIntToFloat(NumericUtils.prefixCodedToInt(val));
265 protected Object readResolve() {
266 return NUMERIC_UTILS_FLOAT_PARSER;
269 public String toString() {
270 return FieldCache.class.getName()+".NUMERIC_UTILS_FLOAT_PARSER";
275 * A parser instance for long values encoded by {@link NumericUtils#longToPrefixCoded(long)}, e.g. when indexed
276 * via {@link NumericField}/{@link NumericTokenStream}.
278 public static final LongParser NUMERIC_UTILS_LONG_PARSER = new LongParser(){
279 public long parseLong(String val) {
280 final int shift = val.charAt(0)-NumericUtils.SHIFT_START_LONG;
281 if (shift>0 && shift<=63)
282 throw new FieldCacheImpl.StopFillCacheException();
283 return NumericUtils.prefixCodedToLong(val);
285 protected Object readResolve() {
286 return NUMERIC_UTILS_LONG_PARSER;
289 public String toString() {
290 return FieldCache.class.getName()+".NUMERIC_UTILS_LONG_PARSER";
295 * A parser instance for double values encoded with {@link NumericUtils}, e.g. when indexed
296 * via {@link NumericField}/{@link NumericTokenStream}.
298 public static final DoubleParser NUMERIC_UTILS_DOUBLE_PARSER = new DoubleParser(){
299 public double parseDouble(String val) {
300 final int shift = val.charAt(0)-NumericUtils.SHIFT_START_LONG;
301 if (shift>0 && shift<=63)
302 throw new FieldCacheImpl.StopFillCacheException();
303 return NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(val));
305 protected Object readResolve() {
306 return NUMERIC_UTILS_DOUBLE_PARSER;
309 public String toString() {
310 return FieldCache.class.getName()+".NUMERIC_UTILS_DOUBLE_PARSER";
314 /** Checks the internal cache for an appropriate entry, and if none is found,
315 * reads the terms in <code>field</code> and returns a bit set at the size of
316 * <code>reader.maxDoc()</code>, with turned on bits for each docid that
317 * does have a value for this field.
319 public Bits getDocsWithField(IndexReader reader, String field)
322 /** Checks the internal cache for an appropriate entry, and if none is
323 * found, reads the terms in <code>field</code> as a single byte and returns an array
324 * of size <code>reader.maxDoc()</code> of the value each document
325 * has in the given field.
326 * @param reader Used to get field values.
327 * @param field Which field contains the single byte values.
328 * @return The values in the given field for each document.
329 * @throws IOException If any error occurs.
331 public byte[] getBytes (IndexReader reader, String field)
334 /** Checks the internal cache for an appropriate entry, and if none is found,
335 * reads the terms in <code>field</code> as bytes and returns an array of
336 * size <code>reader.maxDoc()</code> of the value each document has in the
338 * @param reader Used to get field values.
339 * @param field Which field contains the bytes.
340 * @param parser Computes byte for string values.
341 * @return The values in the given field for each document.
342 * @throws IOException If any error occurs.
344 public byte[] getBytes (IndexReader reader, String field, ByteParser parser)
347 /** Checks the internal cache for an appropriate entry, and if none is found,
348 * reads the terms in <code>field</code> as bytes and returns an array of
349 * size <code>reader.maxDoc()</code> of the value each document has in the
351 * @param reader Used to get field values.
352 * @param field Which field contains the bytes.
353 * @param parser Computes byte for string values.
354 * @param setDocsWithField If true then {@link #getDocsWithField} will
355 * also be computed and stored in the FieldCache.
356 * @return The values in the given field for each document.
357 * @throws IOException If any error occurs.
359 public byte[] getBytes (IndexReader reader, String field, ByteParser parser, boolean setDocsWithField)
362 /** Checks the internal cache for an appropriate entry, and if none is
363 * found, reads the terms in <code>field</code> as shorts and returns an array
364 * of size <code>reader.maxDoc()</code> of the value each document
365 * has in the given field.
366 * @param reader Used to get field values.
367 * @param field Which field contains the shorts.
368 * @return The values in the given field for each document.
369 * @throws IOException If any error occurs.
371 public short[] getShorts (IndexReader reader, String field)
374 /** Checks the internal cache for an appropriate entry, and if none is found,
375 * reads the terms in <code>field</code> as shorts and returns an array of
376 * size <code>reader.maxDoc()</code> of the value each document has in the
378 * @param reader Used to get field values.
379 * @param field Which field contains the shorts.
380 * @param parser Computes short for string values.
381 * @return The values in the given field for each document.
382 * @throws IOException If any error occurs.
384 public short[] getShorts (IndexReader reader, String field, ShortParser parser)
387 /** Checks the internal cache for an appropriate entry, and if none is found,
388 * reads the terms in <code>field</code> as shorts and returns an array of
389 * size <code>reader.maxDoc()</code> of the value each document has in the
391 * @param reader Used to get field values.
392 * @param field Which field contains the shorts.
393 * @param parser Computes short for string values.
394 * @param setDocsWithField If true then {@link #getDocsWithField} will
395 * also be computed and stored in the FieldCache.
396 * @return The values in the given field for each document.
397 * @throws IOException If any error occurs.
399 public short[] getShorts (IndexReader reader, String field, ShortParser parser, boolean setDocsWithField)
402 /** Checks the internal cache for an appropriate entry, and if none is
403 * found, reads the terms in <code>field</code> as integers and returns an array
404 * of size <code>reader.maxDoc()</code> of the value each document
405 * has in the given field.
406 * @param reader Used to get field values.
407 * @param field Which field contains the integers.
408 * @return The values in the given field for each document.
409 * @throws IOException If any error occurs.
411 public int[] getInts (IndexReader reader, String field)
414 /** Checks the internal cache for an appropriate entry, and if none is found,
415 * reads the terms in <code>field</code> as integers and returns an array of
416 * size <code>reader.maxDoc()</code> of the value each document has in the
418 * @param reader Used to get field values.
419 * @param field Which field contains the integers.
420 * @param parser Computes integer for string values.
421 * @return The values in the given field for each document.
422 * @throws IOException If any error occurs.
424 public int[] getInts (IndexReader reader, String field, IntParser parser)
427 /** Checks the internal cache for an appropriate entry, and if none is found,
428 * reads the terms in <code>field</code> as integers and returns an array of
429 * size <code>reader.maxDoc()</code> of the value each document has in the
431 * @param reader Used to get field values.
432 * @param field Which field contains the integers.
433 * @param parser Computes integer for string values.
434 * @param setDocsWithField If true then {@link #getDocsWithField} will
435 * also be computed and stored in the FieldCache.
436 * @return The values in the given field for each document.
437 * @throws IOException If any error occurs.
439 public int[] getInts (IndexReader reader, String field, IntParser parser, boolean setDocsWithField)
442 /** Checks the internal cache for an appropriate entry, and if
443 * none is found, reads the terms in <code>field</code> as floats and returns an array
444 * of size <code>reader.maxDoc()</code> of the value each document
445 * has in the given field.
446 * @param reader Used to get field values.
447 * @param field Which field contains the floats.
448 * @return The values in the given field for each document.
449 * @throws IOException If any error occurs.
451 public float[] getFloats (IndexReader reader, String field)
454 /** Checks the internal cache for an appropriate entry, and if
455 * none is found, reads the terms in <code>field</code> as floats and returns an array
456 * of size <code>reader.maxDoc()</code> of the value each document
457 * has in the given field.
458 * @param reader Used to get field values.
459 * @param field Which field contains the floats.
460 * @param parser Computes float for string values.
461 * @return The values in the given field for each document.
462 * @throws IOException If any error occurs.
464 public float[] getFloats (IndexReader reader, String field,
465 FloatParser parser) throws IOException;
467 /** Checks the internal cache for an appropriate entry, and if
468 * none is found, reads the terms in <code>field</code> as floats and returns an array
469 * of size <code>reader.maxDoc()</code> of the value each document
470 * has in the given field.
471 * @param reader Used to get field values.
472 * @param field Which field contains the floats.
473 * @param parser Computes float for string values.
474 * @param setDocsWithField If true then {@link #getDocsWithField} will
475 * also be computed and stored in the FieldCache.
476 * @return The values in the given field for each document.
477 * @throws IOException If any error occurs.
479 public float[] getFloats (IndexReader reader, String field,
480 FloatParser parser, boolean setDocsWithField) throws IOException;
483 * Checks the internal cache for an appropriate entry, and if none is
484 * found, reads the terms in <code>field</code> as longs and returns an array
485 * of size <code>reader.maxDoc()</code> of the value each document
486 * has in the given field.
488 * @param reader Used to get field values.
489 * @param field Which field contains the longs.
490 * @return The values in the given field for each document.
491 * @throws java.io.IOException If any error occurs.
493 public long[] getLongs(IndexReader reader, String field)
497 * Checks the internal cache for an appropriate entry, and if none is found,
498 * reads the terms in <code>field</code> as longs and returns an array of
499 * size <code>reader.maxDoc()</code> of the value each document has in the
502 * @param reader Used to get field values.
503 * @param field Which field contains the longs.
504 * @param parser Computes integer for string values.
505 * @return The values in the given field for each document.
506 * @throws IOException If any error occurs.
508 public long[] getLongs(IndexReader reader, String field, LongParser parser)
511 * Checks the internal cache for an appropriate entry, and if none is found,
512 * reads the terms in <code>field</code> as longs and returns an array of
513 * size <code>reader.maxDoc()</code> of the value each document has in the
516 * @param reader Used to get field values.
517 * @param field Which field contains the longs.
518 * @param parser Computes integer for string values.
519 * @param setDocsWithField If true then {@link #getDocsWithField} will
520 * also be computed and stored in the FieldCache.
521 * @return The values in the given field for each document.
522 * @throws IOException If any error occurs.
524 public long[] getLongs(IndexReader reader, String field, LongParser parser, boolean setDocsWithField)
528 * Checks the internal cache for an appropriate entry, and if none is
529 * found, reads the terms in <code>field</code> as integers and returns an array
530 * of size <code>reader.maxDoc()</code> of the value each document
531 * has in the given field.
533 * @param reader Used to get field values.
534 * @param field Which field contains the doubles.
535 * @return The values in the given field for each document.
536 * @throws IOException If any error occurs.
538 public double[] getDoubles(IndexReader reader, String field)
542 * Checks the internal cache for an appropriate entry, and if none is found,
543 * reads the terms in <code>field</code> as doubles and returns an array of
544 * size <code>reader.maxDoc()</code> of the value each document has in the
547 * @param reader Used to get field values.
548 * @param field Which field contains the doubles.
549 * @param parser Computes integer for string values.
550 * @return The values in the given field for each document.
551 * @throws IOException If any error occurs.
553 public double[] getDoubles(IndexReader reader, String field, DoubleParser parser)
557 * Checks the internal cache for an appropriate entry, and if none is found,
558 * reads the terms in <code>field</code> as doubles and returns an array of
559 * size <code>reader.maxDoc()</code> of the value each document has in the
562 * @param reader Used to get field values.
563 * @param field Which field contains the doubles.
564 * @param parser Computes integer for string values.
565 * @param setDocsWithField If true then {@link #getDocsWithField} will
566 * also be computed and stored in the FieldCache.
567 * @return The values in the given field for each document.
568 * @throws IOException If any error occurs.
570 public double[] getDoubles(IndexReader reader, String field, DoubleParser parser, boolean setDocsWithField)
573 /** Checks the internal cache for an appropriate entry, and if none
574 * is found, reads the term values in <code>field</code> and returns an array
575 * of size <code>reader.maxDoc()</code> containing the value each document
576 * has in the given field.
577 * @param reader Used to get field values.
578 * @param field Which field contains the strings.
579 * @return The values in the given field for each document.
580 * @throws IOException If any error occurs.
582 public String[] getStrings (IndexReader reader, String field)
585 /** Checks the internal cache for an appropriate entry, and if none
586 * is found reads the term values in <code>field</code> and returns
587 * an array of them in natural order, along with an array telling
588 * which element in the term array each document uses.
589 * @param reader Used to get field values.
590 * @param field Which field contains the strings.
591 * @return Array of terms and index into the array for each document.
592 * @throws IOException If any error occurs.
594 public StringIndex getStringIndex (IndexReader reader, String field)
598 * EXPERT: A unique Identifier/Description for each item in the FieldCache.
599 * Can be useful for logging/debugging.
600 * @lucene.experimental
602 public static abstract class CacheEntry {
603 public abstract Object getReaderKey();
604 public abstract String getFieldName();
605 public abstract Class<?> getCacheType();
606 public abstract Object getCustom();
607 public abstract Object getValue();
608 private String size = null;
609 protected final void setEstimatedSize(String size) {
613 * @see #estimateSize(RamUsageEstimator)
615 public void estimateSize() {
616 estimateSize(new RamUsageEstimator(false)); // doesn't check for interned
619 * Computes (and stores) the estimated size of the cache Value
620 * @see #getEstimatedSize
622 public void estimateSize(RamUsageEstimator ramCalc) {
623 long size = ramCalc.estimateRamUsage(getValue());
624 setEstimatedSize(RamUsageEstimator.humanReadableUnits
625 (size, new DecimalFormat("0.#")));
629 * The most recently estimated size of the value, null unless
630 * estimateSize has been called.
632 public final String getEstimatedSize() {
638 public String toString() {
639 StringBuilder b = new StringBuilder();
640 b.append("'").append(getReaderKey()).append("'=>");
641 b.append("'").append(getFieldName()).append("',");
642 b.append(getCacheType()).append(",").append(getCustom());
643 b.append("=>").append(getValue().getClass().getName()).append("#");
644 b.append(System.identityHashCode(getValue()));
646 String s = getEstimatedSize();
648 b.append(" (size =~ ").append(s).append(')');
657 * EXPERT: Generates an array of CacheEntry objects representing all items
658 * currently in the FieldCache.
660 * NOTE: These CacheEntry objects maintain a strong reference to the
661 * Cached Values. Maintaining references to a CacheEntry the IndexReader
662 * associated with it has garbage collected will prevent the Value itself
663 * from being garbage collected when the Cache drops the WeakReference.
665 * @lucene.experimental
667 public abstract CacheEntry[] getCacheEntries();
671 * EXPERT: Instructs the FieldCache to forcibly expunge all entries
672 * from the underlying caches. This is intended only to be used for
673 * test methods as a way to ensure a known base state of the Cache
674 * (with out needing to rely on GC to free WeakReferences).
675 * It should not be relied on for "Cache maintenance" in general
678 * @lucene.experimental
680 public abstract void purgeAllCaches();
683 * Expert: drops all cache entries associated with this
684 * reader. NOTE: this reader must precisely match the
685 * reader that the cache entry is keyed on. If you pass a
686 * top-level reader, it usually will have no effect as
687 * Lucene now caches at the segment reader level.
689 public abstract void purge(IndexReader r);
692 * If non-null, FieldCacheImpl will warn whenever
693 * entries are created that are not sane according to
694 * {@link org.apache.lucene.util.FieldCacheSanityChecker}.
696 public void setInfoStream(PrintStream stream);
698 /** counterpart of {@link #setInfoStream(PrintStream)} */
699 public PrintStream getInfoStream();