lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java

   1 package org.apache.lucene.search;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.Serializable;
  21 import java.io.IOException;
  22 import java.util.Map;
  23 import java.util.WeakHashMap;
  24
  25 import org.apache.lucene.index.IndexReader;
  26 import org.apache.lucene.util.FixedBitSet;
  27
  28 /**
  29  * Wraps another filter's result and caches it.  The purpose is to allow
  30  * filters to simply filter, and then wrap with this class
  31  * to add caching.
  32  *
  33  * <p><b>NOTE</b>: if you wrap this filter as a query (eg,
  34  * using ConstantScoreQuery), you'll likely want to enforce
  35  * deletions (using either {@link DeletesMode#RECACHE} or
  36  * {@link DeletesMode#DYNAMIC}).
  37  */
  38 public class CachingWrapperFilter extends Filter {
  39   Filter filter;
  40
  41   /**
  42    * Expert: Specifies how new deletions against a reopened
  43    * reader should be handled.
  44    *
  45    * <p>The default is IGNORE, which means the cache entry
  46    * will be re-used for a given segment, even when that
  47    * segment has been reopened due to changes in deletions.
  48    * This is a big performance gain, especially with
  49    * near-real-timer readers, since you don't hit a cache
  50    * miss on every reopened reader for prior segments.</p>
  51    *
  52    * <p>However, in some cases this can cause invalid query
  53    * results, allowing deleted documents to be returned.
  54    * This only happens if the main query does not rule out
  55    * deleted documents on its own, such as a toplevel
  56    * ConstantScoreQuery.  To fix this, use RECACHE to
  57    * re-create the cached filter (at a higher per-reopen
  58    * cost, but at faster subsequent search performance), or
  59    * use DYNAMIC to dynamically intersect deleted docs (fast
  60    * reopen time but some hit to search performance).</p>
  61    */
  62   public static enum DeletesMode {IGNORE, RECACHE, DYNAMIC};
  63
  64   protected final FilterCache<DocIdSet> cache;
  65
  66   static abstract class FilterCache<T> implements Serializable {
  67
  68     /**
  69      * A transient Filter cache (package private because of test)
  70      */
  71     // NOTE: not final so that we can dynamically re-init
  72     // after de-serialize
  73     transient Map<Object,T> cache;
  74
  75     private final DeletesMode deletesMode;
  76
  77     public FilterCache(DeletesMode deletesMode) {
  78       this.deletesMode = deletesMode;
  79     }
  80
  81     public synchronized T get(IndexReader reader, Object coreKey, Object delCoreKey) throws IOException {
  82       T value;
  83
  84       if (cache == null) {
  85         cache = new WeakHashMap<Object,T>();
  86       }
  87
  88       if (deletesMode == DeletesMode.IGNORE) {
  89         // key on core
  90         value = cache.get(coreKey);
  91       } else if (deletesMode == DeletesMode.RECACHE) {
  92         // key on deletes, if any, else core
  93         value = cache.get(delCoreKey);
  94       } else {
  95         assert deletesMode == DeletesMode.DYNAMIC;
  96
  97         // first try for exact match
  98         value = cache.get(delCoreKey);
  99
 100         if (value == null) {
 101           // now for core match, but dynamically AND NOT
 102           // deletions
 103           value = cache.get(coreKey);
 104           if (value != null && reader.hasDeletions()) {
 105             value = mergeDeletes(reader, value);
 106           }
 107         }
 108       }
 109
 110       return value;
 111     }
 112
 113     protected abstract T mergeDeletes(IndexReader reader, T value);
 114
 115     public synchronized void put(Object coreKey, Object delCoreKey, T value) {
 116       if (deletesMode == DeletesMode.IGNORE) {
 117         cache.put(coreKey, value);
 118       } else if (deletesMode == DeletesMode.RECACHE) {
 119         cache.put(delCoreKey, value);
 120       } else {
 121         cache.put(coreKey, value);
 122         cache.put(delCoreKey, value);
 123       }
 124     }
 125   }
 126
 127   /**
 128    * New deletes are ignored by default, which gives higher
 129    * cache hit rate on reopened readers.  Most of the time
 130    * this is safe, because the filter will be AND'd with a
 131    * Query that fully enforces deletions.  If instead you
 132    * need this filter to always enforce deletions, pass
 133    * either {@link DeletesMode#RECACHE} or {@link
 134    * DeletesMode#DYNAMIC}.
 135    * @param filter Filter to cache results of
 136    */
 137   public CachingWrapperFilter(Filter filter) {
 138     this(filter, DeletesMode.IGNORE);
 139   }
 140
 141   /**
 142    * Expert: by default, the cached filter will be shared
 143    * across reopened segments that only had changes to their
 144    * deletions.
 145    *
 146    * @param filter Filter to cache results of
 147    * @param deletesMode See {@link DeletesMode}
 148    */
 149   public CachingWrapperFilter(Filter filter, DeletesMode deletesMode) {
 150     this.filter = filter;
 151     cache = new FilterCache<DocIdSet>(deletesMode) {
 152       @Override
 153       public DocIdSet mergeDeletes(final IndexReader r, final DocIdSet docIdSet) {
 154         return new FilteredDocIdSet(docIdSet) {
 155           @Override
 156           protected boolean match(int docID) {
 157             return !r.isDeleted(docID);
 158           }
 159         };
 160       }
 161     };
 162   }
 163
 164   /** Provide the DocIdSet to be cached, using the DocIdSet provided
 165    *  by the wrapped Filter.
 166    *  <p>This implementation returns the given {@link DocIdSet}, if {@link DocIdSet#isCacheable}
 167    *  returns <code>true</code>, else it copies the {@link DocIdSetIterator} into
 168    *  an {@link FixedBitSet}.
 169    */
 170   protected DocIdSet docIdSetToCache(DocIdSet docIdSet, IndexReader reader) throws IOException {
 171     if (docIdSet == null) {
 172       // this is better than returning null, as the nonnull result can be cached
 173       return DocIdSet.EMPTY_DOCIDSET;
 174     } else if (docIdSet.isCacheable()) {
 175       return docIdSet;
 176     } else {
 177       final DocIdSetIterator it = docIdSet.iterator();
 178       // null is allowed to be returned by iterator(),
 179       // in this case we wrap with the empty set,
 180       // which is cacheable.
 181       if (it == null) {
 182         return DocIdSet.EMPTY_DOCIDSET;
 183       } else {
 184         final FixedBitSet bits = new FixedBitSet(reader.maxDoc());
 185         bits.or(it);
 186         return bits;
 187       }
 188     }
 189   }
 190
 191   // for testing
 192   int hitCount, missCount;
 193
 194   @Override
 195   public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
 196
 197     final Object coreKey = reader.getCoreCacheKey();
 198     final Object delCoreKey = reader.hasDeletions() ? reader.getDeletesCacheKey() : coreKey;
 199
 200     DocIdSet docIdSet = cache.get(reader, coreKey, delCoreKey);
 201     if (docIdSet != null) {
 202       hitCount++;
 203       return docIdSet;
 204     }
 205
 206     missCount++;
 207
 208     // cache miss
 209     docIdSet = docIdSetToCache(filter.getDocIdSet(reader), reader);
 210
 211     if (docIdSet != null) {
 212       cache.put(coreKey, delCoreKey, docIdSet);
 213     }
 214
 215     return docIdSet;
 216   }
 217
 218   @Override
 219   public String toString() {
 220     return "CachingWrapperFilter("+filter+")";
 221   }
 222
 223   @Override
 224   public boolean equals(Object o) {
 225     if (!(o instanceof CachingWrapperFilter)) return false;
 226     return this.filter.equals(((CachingWrapperFilter)o).filter);
 227   }
 228
 229   @Override
 230   public int hashCode() {
 231     return filter.hashCode() ^ 0x1117BF25;
 232   }
 233 }