lucene-java-3.4.0/lucene/src/java/org/apache/lucene/search/FieldCache.java

   1 package org.apache.lucene.search;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import org.apache.lucene.index.IndexReader;
  21 import org.apache.lucene.util.NumericUtils;
  22 import org.apache.lucene.util.RamUsageEstimator;
  23 import org.apache.lucene.document.NumericField; // for javadocs
  24 import org.apache.lucene.analysis.NumericTokenStream; // for javadocs
  25
  26 import java.io.IOException;
  27 import java.io.Serializable;
  28 import java.io.PrintStream;
  29
  30 import java.text.DecimalFormat;
  31
  32 /**
  33  * Expert: Maintains caches of term values.
  34  *
  35  * <p>Created: May 19, 2004 11:13:14 AM
  36  *
  37  * @since   lucene 1.4
  38  * @see org.apache.lucene.util.FieldCacheSanityChecker
  39  */
  40 public interface FieldCache {
  41
  42   public static final class CreationPlaceholder {
  43     Object value;
  44   }
  45
  46   /** Indicator for StringIndex values in the cache. */
  47   // NOTE: the value assigned to this constant must not be
  48   // the same as any of those in SortField!!
  49   public static final int STRING_INDEX = -1;
  50
  51
  52   /** Expert: Stores term text values and document ordering data. */
  53   public static class StringIndex {
  54
  55     public int binarySearchLookup(String key) {
  56       // this special case is the reason that Arrays.binarySearch() isn't useful.
  57       if (key == null)
  58         return 0;
  59
  60       int low = 1;
  61       int high = lookup.length-1;
  62
  63       while (low <= high) {
  64         int mid = (low + high) >>> 1;
  65         int cmp = lookup[mid].compareTo(key);
  66
  67         if (cmp < 0)
  68           low = mid + 1;
  69         else if (cmp > 0)
  70           high = mid - 1;
  71         else
  72           return mid; // key found
  73       }
  74       return -(low + 1);  // key not found.
  75     }
  76
  77     /** All the term values, in natural order. */
  78     public final String[] lookup;
  79
  80     /** For each document, an index into the lookup array. */
  81     public final int[] order;
  82
  83     /** Creates one of these objects */
  84     public StringIndex (int[] values, String[] lookup) {
  85       this.order = values;
  86       this.lookup = lookup;
  87     }
  88   }
  89
  90   /**
  91    * Marker interface as super-interface to all parsers. It
  92    * is used to specify a custom parser to {@link
  93    * SortField#SortField(String, FieldCache.Parser)}.
  94    */
  95   public interface Parser extends Serializable {
  96   }
  97
  98   /** Interface to parse bytes from document fields.
  99    * @see FieldCache#getBytes(IndexReader, String, FieldCache.ByteParser)
 100    */
 101   public interface ByteParser extends Parser {
 102     /** Return a single Byte representation of this field's value. */
 103     public byte parseByte(String string);
 104   }
 105
 106   /** Interface to parse shorts from document fields.
 107    * @see FieldCache#getShorts(IndexReader, String, FieldCache.ShortParser)
 108    */
 109   public interface ShortParser extends Parser {
 110     /** Return a short representation of this field's value. */
 111     public short parseShort(String string);
 112   }
 113
 114   /** Interface to parse ints from document fields.
 115    * @see FieldCache#getInts(IndexReader, String, FieldCache.IntParser)
 116    */
 117   public interface IntParser extends Parser {
 118     /** Return an integer representation of this field's value. */
 119     public int parseInt(String string);
 120   }
 121
 122   /** Interface to parse floats from document fields.
 123    * @see FieldCache#getFloats(IndexReader, String, FieldCache.FloatParser)
 124    */
 125   public interface FloatParser extends Parser {
 126     /** Return an float representation of this field's value. */
 127     public float parseFloat(String string);
 128   }
 129
 130   /** Interface to parse long from document fields.
 131    * @see FieldCache#getLongs(IndexReader, String, FieldCache.LongParser)
 132    */
 133   public interface LongParser extends Parser {
 134     /** Return an long representation of this field's value. */
 135     public long parseLong(String string);
 136   }
 137
 138   /** Interface to parse doubles from document fields.
 139    * @see FieldCache#getDoubles(IndexReader, String, FieldCache.DoubleParser)
 140    */
 141   public interface DoubleParser extends Parser {
 142     /** Return an long representation of this field's value. */
 143     public double parseDouble(String string);
 144   }
 145
 146   /** Expert: The cache used internally by sorting and range query classes. */
 147   public static FieldCache DEFAULT = new FieldCacheImpl();
 148
 149   /** The default parser for byte values, which are encoded by {@link Byte#toString(byte)} */
 150   public static final ByteParser DEFAULT_BYTE_PARSER = new ByteParser() {
 151     public byte parseByte(String value) {
 152       return Byte.parseByte(value);
 153     }
 154     protected Object readResolve() {
 155       return DEFAULT_BYTE_PARSER;
 156     }
 157     @Override
 158     public String toString() {
 159       return FieldCache.class.getName()+".DEFAULT_BYTE_PARSER";
 160     }
 161   };
 162
 163   /** The default parser for short values, which are encoded by {@link Short#toString(short)} */
 164   public static final ShortParser DEFAULT_SHORT_PARSER = new ShortParser() {
 165     public short parseShort(String value) {
 166       return Short.parseShort(value);
 167     }
 168     protected Object readResolve() {
 169       return DEFAULT_SHORT_PARSER;
 170     }
 171     @Override
 172     public String toString() {
 173       return FieldCache.class.getName()+".DEFAULT_SHORT_PARSER";
 174     }
 175   };
 176
 177   /** The default parser for int values, which are encoded by {@link Integer#toString(int)} */
 178   public static final IntParser DEFAULT_INT_PARSER = new IntParser() {
 179     public int parseInt(String value) {
 180       return Integer.parseInt(value);
 181     }
 182     protected Object readResolve() {
 183       return DEFAULT_INT_PARSER;
 184     }
 185     @Override
 186     public String toString() {
 187       return FieldCache.class.getName()+".DEFAULT_INT_PARSER";
 188     }
 189   };
 190
 191   /** The default parser for float values, which are encoded by {@link Float#toString(float)} */
 192   public static final FloatParser DEFAULT_FLOAT_PARSER = new FloatParser() {
 193     public float parseFloat(String value) {
 194       return Float.parseFloat(value);
 195     }
 196     protected Object readResolve() {
 197       return DEFAULT_FLOAT_PARSER;
 198     }
 199     @Override
 200     public String toString() {
 201       return FieldCache.class.getName()+".DEFAULT_FLOAT_PARSER";
 202     }
 203   };
 204
 205   /** The default parser for long values, which are encoded by {@link Long#toString(long)} */
 206   public static final LongParser DEFAULT_LONG_PARSER = new LongParser() {
 207     public long parseLong(String value) {
 208       return Long.parseLong(value);
 209     }
 210     protected Object readResolve() {
 211       return DEFAULT_LONG_PARSER;
 212     }
 213     @Override
 214     public String toString() {
 215       return FieldCache.class.getName()+".DEFAULT_LONG_PARSER";
 216     }
 217   };
 218
 219   /** The default parser for double values, which are encoded by {@link Double#toString(double)} */
 220   public static final DoubleParser DEFAULT_DOUBLE_PARSER = new DoubleParser() {
 221     public double parseDouble(String value) {
 222       return Double.parseDouble(value);
 223     }
 224     protected Object readResolve() {
 225       return DEFAULT_DOUBLE_PARSER;
 226     }
 227     @Override
 228     public String toString() {
 229       return FieldCache.class.getName()+".DEFAULT_DOUBLE_PARSER";
 230     }
 231   };
 232
 233   /**
 234    * A parser instance for int values encoded by {@link NumericUtils#intToPrefixCoded(int)}, e.g. when indexed
 235    * via {@link NumericField}/{@link NumericTokenStream}.
 236    */
 237   public static final IntParser NUMERIC_UTILS_INT_PARSER=new IntParser(){
 238     public int parseInt(String val) {
 239       final int shift = val.charAt(0)-NumericUtils.SHIFT_START_INT;
 240       if (shift>0 && shift<=31)
 241         throw new FieldCacheImpl.StopFillCacheException();
 242       return NumericUtils.prefixCodedToInt(val);
 243     }
 244     protected Object readResolve() {
 245       return NUMERIC_UTILS_INT_PARSER;
 246     }
 247     @Override
 248     public String toString() {
 249       return FieldCache.class.getName()+".NUMERIC_UTILS_INT_PARSER";
 250     }
 251   };
 252
 253   /**
 254    * A parser instance for float values encoded with {@link NumericUtils}, e.g. when indexed
 255    * via {@link NumericField}/{@link NumericTokenStream}.
 256    */
 257   public static final FloatParser NUMERIC_UTILS_FLOAT_PARSER=new FloatParser(){
 258     public float parseFloat(String val) {
 259       final int shift = val.charAt(0)-NumericUtils.SHIFT_START_INT;
 260       if (shift>0 && shift<=31)
 261         throw new FieldCacheImpl.StopFillCacheException();
 262       return NumericUtils.sortableIntToFloat(NumericUtils.prefixCodedToInt(val));
 263     }
 264     protected Object readResolve() {
 265       return NUMERIC_UTILS_FLOAT_PARSER;
 266     }
 267     @Override
 268     public String toString() {
 269       return FieldCache.class.getName()+".NUMERIC_UTILS_FLOAT_PARSER";
 270     }
 271   };
 272
 273   /**
 274    * A parser instance for long values encoded by {@link NumericUtils#longToPrefixCoded(long)}, e.g. when indexed
 275    * via {@link NumericField}/{@link NumericTokenStream}.
 276    */
 277   public static final LongParser NUMERIC_UTILS_LONG_PARSER = new LongParser(){
 278     public long parseLong(String val) {
 279       final int shift = val.charAt(0)-NumericUtils.SHIFT_START_LONG;
 280       if (shift>0 && shift<=63)
 281         throw new FieldCacheImpl.StopFillCacheException();
 282       return NumericUtils.prefixCodedToLong(val);
 283     }
 284     protected Object readResolve() {
 285       return NUMERIC_UTILS_LONG_PARSER;
 286     }
 287     @Override
 288     public String toString() {
 289       return FieldCache.class.getName()+".NUMERIC_UTILS_LONG_PARSER";
 290     }
 291   };
 292
 293   /**
 294    * A parser instance for double values encoded with {@link NumericUtils}, e.g. when indexed
 295    * via {@link NumericField}/{@link NumericTokenStream}.
 296    */
 297   public static final DoubleParser NUMERIC_UTILS_DOUBLE_PARSER = new DoubleParser(){
 298     public double parseDouble(String val) {
 299       final int shift = val.charAt(0)-NumericUtils.SHIFT_START_LONG;
 300       if (shift>0 && shift<=63)
 301         throw new FieldCacheImpl.StopFillCacheException();
 302       return NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(val));
 303     }
 304     protected Object readResolve() {
 305       return NUMERIC_UTILS_DOUBLE_PARSER;
 306     }
 307     @Override
 308     public String toString() {
 309       return FieldCache.class.getName()+".NUMERIC_UTILS_DOUBLE_PARSER";
 310     }
 311   };
 312
 313   /** Checks the internal cache for an appropriate entry, and if none is found,
 314    * reads the terms in <code>field</code> and returns a bit set at the size of
 315    * <code>reader.maxDoc()</code>, with turned on bits for each docid that
 316    * does not have a value for this field.
 317    */
 318   public DocIdSet getUnValuedDocs (IndexReader reader, String field)
 319   throws IOException;
 320
 321   /** Checks the internal cache for an appropriate entry, and if none is
 322    * found, reads the terms in <code>field</code> as a single byte and returns an array
 323    * of size <code>reader.maxDoc()</code> of the value each document
 324    * has in the given field.
 325    * @param reader  Used to get field values.
 326    * @param field   Which field contains the single byte values.
 327    * @return The values in the given field for each document.
 328    * @throws IOException  If any error occurs.
 329    */
 330   public byte[] getBytes (IndexReader reader, String field)
 331   throws IOException;
 332
 333   /** Checks the internal cache for an appropriate entry, and if none is found,
 334    * reads the terms in <code>field</code> as bytes and returns an array of
 335    * size <code>reader.maxDoc()</code> of the value each document has in the
 336    * given field.
 337    * @param reader  Used to get field values.
 338    * @param field   Which field contains the bytes.
 339    * @param parser  Computes byte for string values.
 340    * @return The values in the given field for each document.
 341    * @throws IOException  If any error occurs.
 342    */
 343   public byte[] getBytes (IndexReader reader, String field, ByteParser parser)
 344   throws IOException;
 345
 346   /** Checks the internal cache for an appropriate entry, and if none is
 347    * found, reads the terms in <code>field</code> as shorts and returns an array
 348    * of size <code>reader.maxDoc()</code> of the value each document
 349    * has in the given field.
 350    * @param reader  Used to get field values.
 351    * @param field   Which field contains the shorts.
 352    * @return The values in the given field for each document.
 353    * @throws IOException  If any error occurs.
 354    */
 355   public short[] getShorts (IndexReader reader, String field)
 356   throws IOException;
 357
 358   /** Checks the internal cache for an appropriate entry, and if none is found,
 359    * reads the terms in <code>field</code> as shorts and returns an array of
 360    * size <code>reader.maxDoc()</code> of the value each document has in the
 361    * given field.
 362    * @param reader  Used to get field values.
 363    * @param field   Which field contains the shorts.
 364    * @param parser  Computes short for string values.
 365    * @return The values in the given field for each document.
 366    * @throws IOException  If any error occurs.
 367    */
 368   public short[] getShorts (IndexReader reader, String field, ShortParser parser)
 369   throws IOException;
 370
 371   /** Checks the internal cache for an appropriate entry, and if none is
 372    * found, reads the terms in <code>field</code> as integers and returns an array
 373    * of size <code>reader.maxDoc()</code> of the value each document
 374    * has in the given field.
 375    * @param reader  Used to get field values.
 376    * @param field   Which field contains the integers.
 377    * @return The values in the given field for each document.
 378    * @throws IOException  If any error occurs.
 379    */
 380   public int[] getInts (IndexReader reader, String field)
 381   throws IOException;
 382
 383   /** Checks the internal cache for an appropriate entry, and if none is found,
 384    * reads the terms in <code>field</code> as integers and returns an array of
 385    * size <code>reader.maxDoc()</code> of the value each document has in the
 386    * given field.
 387    * @param reader  Used to get field values.
 388    * @param field   Which field contains the integers.
 389    * @param parser  Computes integer for string values.
 390    * @return The values in the given field for each document.
 391    * @throws IOException  If any error occurs.
 392    */
 393   public int[] getInts (IndexReader reader, String field, IntParser parser)
 394   throws IOException;
 395
 396   /** Checks the internal cache for an appropriate entry, and if
 397    * none is found, reads the terms in <code>field</code> as floats and returns an array
 398    * of size <code>reader.maxDoc()</code> of the value each document
 399    * has in the given field.
 400    * @param reader  Used to get field values.
 401    * @param field   Which field contains the floats.
 402    * @return The values in the given field for each document.
 403    * @throws IOException  If any error occurs.
 404    */
 405   public float[] getFloats (IndexReader reader, String field)
 406   throws IOException;
 407
 408   /** Checks the internal cache for an appropriate entry, and if
 409    * none is found, reads the terms in <code>field</code> as floats and returns an array
 410    * of size <code>reader.maxDoc()</code> of the value each document
 411    * has in the given field.
 412    * @param reader  Used to get field values.
 413    * @param field   Which field contains the floats.
 414    * @param parser  Computes float for string values.
 415    * @return The values in the given field for each document.
 416    * @throws IOException  If any error occurs.
 417    */
 418   public float[] getFloats (IndexReader reader, String field,
 419                             FloatParser parser) throws IOException;
 420
 421   /**
 422    * Checks the internal cache for an appropriate entry, and if none is
 423    * found, reads the terms in <code>field</code> as longs and returns an array
 424    * of size <code>reader.maxDoc()</code> of the value each document
 425    * has in the given field.
 426    *
 427    * @param reader Used to get field values.
 428    * @param field  Which field contains the longs.
 429    * @return The values in the given field for each document.
 430    * @throws java.io.IOException If any error occurs.
 431    */
 432   public long[] getLongs(IndexReader reader, String field)
 433           throws IOException;
 434
 435   /**
 436    * Checks the internal cache for an appropriate entry, and if none is found,
 437    * reads the terms in <code>field</code> as longs and returns an array of
 438    * size <code>reader.maxDoc()</code> of the value each document has in the
 439    * given field.
 440    *
 441    * @param reader Used to get field values.
 442    * @param field  Which field contains the longs.
 443    * @param parser Computes integer for string values.
 444    * @return The values in the given field for each document.
 445    * @throws IOException If any error occurs.
 446    */
 447   public long[] getLongs(IndexReader reader, String field, LongParser parser)
 448           throws IOException;
 449
 450
 451   /**
 452    * Checks the internal cache for an appropriate entry, and if none is
 453    * found, reads the terms in <code>field</code> as integers and returns an array
 454    * of size <code>reader.maxDoc()</code> of the value each document
 455    * has in the given field.
 456    *
 457    * @param reader Used to get field values.
 458    * @param field  Which field contains the doubles.
 459    * @return The values in the given field for each document.
 460    * @throws IOException If any error occurs.
 461    */
 462   public double[] getDoubles(IndexReader reader, String field)
 463           throws IOException;
 464
 465   /**
 466    * Checks the internal cache for an appropriate entry, and if none is found,
 467    * reads the terms in <code>field</code> as doubles and returns an array of
 468    * size <code>reader.maxDoc()</code> of the value each document has in the
 469    * given field.
 470    *
 471    * @param reader Used to get field values.
 472    * @param field  Which field contains the doubles.
 473    * @param parser Computes integer for string values.
 474    * @return The values in the given field for each document.
 475    * @throws IOException If any error occurs.
 476    */
 477   public double[] getDoubles(IndexReader reader, String field, DoubleParser parser)
 478           throws IOException;
 479
 480   /** Checks the internal cache for an appropriate entry, and if none
 481    * is found, reads the term values in <code>field</code> and returns an array
 482    * of size <code>reader.maxDoc()</code> containing the value each document
 483    * has in the given field.
 484    * @param reader  Used to get field values.
 485    * @param field   Which field contains the strings.
 486    * @return The values in the given field for each document.
 487    * @throws IOException  If any error occurs.
 488    */
 489   public String[] getStrings (IndexReader reader, String field)
 490   throws IOException;
 491
 492   /** Checks the internal cache for an appropriate entry, and if none
 493    * is found reads the term values in <code>field</code> and returns
 494    * an array of them in natural order, along with an array telling
 495    * which element in the term array each document uses.
 496    * @param reader  Used to get field values.
 497    * @param field   Which field contains the strings.
 498    * @return Array of terms and index into the array for each document.
 499    * @throws IOException  If any error occurs.
 500    */
 501   public StringIndex getStringIndex (IndexReader reader, String field)
 502   throws IOException;
 503
 504   /**
 505    * EXPERT: A unique Identifier/Description for each item in the FieldCache.
 506    * Can be useful for logging/debugging.
 507    * @lucene.experimental
 508    */
 509   public static abstract class CacheEntry {
 510     public abstract Object getReaderKey();
 511     public abstract String getFieldName();
 512     public abstract Class<?> getCacheType();
 513     public abstract Object getCustom();
 514     public abstract Object getValue();
 515     private String size = null;
 516     protected final void setEstimatedSize(String size) {
 517       this.size = size;
 518     }
 519     /**
 520      * @see #estimateSize(RamUsageEstimator)
 521      */
 522     public void estimateSize() {
 523       estimateSize(new RamUsageEstimator(false)); // doesn't check for interned
 524     }
 525     /**
 526      * Computes (and stores) the estimated size of the cache Value
 527      * @see #getEstimatedSize
 528      */
 529     public void estimateSize(RamUsageEstimator ramCalc) {
 530       long size = ramCalc.estimateRamUsage(getValue());
 531       setEstimatedSize(RamUsageEstimator.humanReadableUnits
 532                        (size, new DecimalFormat("0.#")));
 533
 534     }
 535     /**
 536      * The most recently estimated size of the value, null unless
 537      * estimateSize has been called.
 538      */
 539     public final String getEstimatedSize() {
 540       return size;
 541     }
 542
 543
 544     @Override
 545     public String toString() {
 546       StringBuilder b = new StringBuilder();
 547       b.append("'").append(getReaderKey()).append("'=>");
 548       b.append("'").append(getFieldName()).append("',");
 549       b.append(getCacheType()).append(",").append(getCustom());
 550       b.append("=>").append(getValue().getClass().getName()).append("#");
 551       b.append(System.identityHashCode(getValue()));
 552
 553       String s = getEstimatedSize();
 554       if(null != s) {
 555         b.append(" (size =~ ").append(s).append(')');
 556       }
 557
 558       return b.toString();
 559     }
 560
 561   }
 562
 563   /**
 564    * EXPERT: Generates an array of CacheEntry objects representing all items
 565    * currently in the FieldCache.
 566    * <p>
 567    * NOTE: These CacheEntry objects maintain a strong reference to the
 568    * Cached Values.  Maintaining references to a CacheEntry the IndexReader
 569    * associated with it has garbage collected will prevent the Value itself
 570    * from being garbage collected when the Cache drops the WeakReference.
 571    * </p>
 572    * @lucene.experimental
 573    */
 574   public abstract CacheEntry[] getCacheEntries();
 575
 576   /**
 577    * <p>
 578    * EXPERT: Instructs the FieldCache to forcibly expunge all entries
 579    * from the underlying caches.  This is intended only to be used for
 580    * test methods as a way to ensure a known base state of the Cache
 581    * (with out needing to rely on GC to free WeakReferences).
 582    * It should not be relied on for "Cache maintenance" in general
 583    * application code.
 584    * </p>
 585    * @lucene.experimental
 586    */
 587   public abstract void purgeAllCaches();
 588
 589   /**
 590    * Expert: drops all cache entries associated with this
 591    * reader.  NOTE: this reader must precisely match the
 592    * reader that the cache entry is keyed on. If you pass a
 593    * top-level reader, it usually will have no effect as
 594    * Lucene now caches at the segment reader level.
 595    */
 596   public abstract void purge(IndexReader r);
 597
 598   /**
 599    * If non-null, FieldCacheImpl will warn whenever
 600    * entries are created that are not sane according to
 601    * {@link org.apache.lucene.util.FieldCacheSanityChecker}.
 602    */
 603   public void setInfoStream(PrintStream stream);
 604
 605   /** counterpart of {@link #setInfoStream(PrintStream)} */
 606   public PrintStream getInfoStream();
 607 }