X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/TotalFacetCountsCache.java diff --git a/lucene-java-3.5.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/TotalFacetCountsCache.java b/lucene-java-3.5.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/TotalFacetCountsCache.java new file mode 100644 index 0000000..23b9562 --- /dev/null +++ b/lucene-java-3.5.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/TotalFacetCountsCache.java @@ -0,0 +1,285 @@ +package org.apache.lucene.facet.search; + +import java.io.File; +import java.io.IOException; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentLinkedQueue; + +import org.apache.lucene.index.IndexReader; + +import org.apache.lucene.facet.index.params.CategoryListParams; +import org.apache.lucene.facet.index.params.FacetIndexingParams; +import org.apache.lucene.facet.search.cache.CategoryListCache; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Manage an LRU cache for {@link TotalFacetCounts} per index, taxonomy, and + * facet indexing params. + * + * @lucene.experimental + */ +public final class TotalFacetCountsCache { + + /** + * Default size of in memory cache for computed total facet counts. + * Set to 2 for the case when an application reopened a reader and + * the original one is still in use (Otherwise there will be + * switching again and again between the two.) + */ + public static final int DEFAULT_CACHE_SIZE = 2; + + private static final TotalFacetCountsCache singleton = new TotalFacetCountsCache(); + + /** + * Get the single instance of this cache + */ + public static TotalFacetCountsCache getSingleton() { + return singleton; + } + + /** + * In-memory cache of TFCs. + * + * @see #markRecentlyUsed(TFCKey) + */ + private ConcurrentHashMap cache = new ConcurrentHashMap(); + + /** + * A queue of active keys for applying LRU policy on eviction from the {@link #cache}. + * @see #markRecentlyUsed(TFCKey) + */ + private ConcurrentLinkedQueue lruKeys = new ConcurrentLinkedQueue(); + + private int maxCacheSize = DEFAULT_CACHE_SIZE; + + /** private constructor for singleton pattern */ + private TotalFacetCountsCache() { + } + + /** + * Get the total facet counts for a reader/taxonomy pair and facet indexing parameters. + * If not in cache, computed here and added to the cache for later use. + * @param indexReader the documents index + * @param taxonomy the taxonomy index + * @param facetIndexingParams facet indexing parameters + * @param clCache category list cache for faster computation, can be null + * @return the total facet counts. + */ + public TotalFacetCounts getTotalCounts(IndexReader indexReader, TaxonomyReader taxonomy, + FacetIndexingParams facetIndexingParams, CategoryListCache clCache) throws IOException { + // create the key + TFCKey key = new TFCKey(indexReader, taxonomy, facetIndexingParams); + // it is important that this call is not synchronized, so that available TFC + // would not wait for one that needs to be computed. + TotalFacetCounts tfc = cache.get(key); + if (tfc != null) { + markRecentlyUsed(key); + return tfc; + } + return computeAndCache(key, clCache); + } + + /** + * Mark key as it as recently used. + *

+ * Implementation notes: Synchronization considerations and the interaction between lruKeys and cache: + *

    + *
  1. A concurrent {@link LinkedHashMap} would have made this class much simpler. + * But unfortunately, Java does not provide one. + * Instead, we combine two concurrent objects: + *
      + *
    • {@link ConcurrentHashMap} for the cached TFCs. + *
    • {@link ConcurrentLinkedQueue} for active keys + *
    + *
  2. Both {@link #lruKeys} and {@link #cache} are concurrently safe. + *
  3. Checks for a cached item through getTotalCounts() are not synchronized. + * Therefore, the case that a needed TFC is in the cache is very fast: + * it does not wait for the computation of other TFCs. + *
  4. computeAndCache() is synchronized, and, has a (double) check of the required + * TFC, to avoid computing the same TFC twice. + *
  5. A race condition in this method (markRecentlyUsed) might result in two copies + * of the same 'key' in lruKeys, but this is handled by the loop in trimCache(), + * where an attempt to remove the same key twice is a no-op. + *
+ */ + private void markRecentlyUsed(TFCKey key) { + lruKeys.remove(key); + lruKeys.add(key); + } + + private synchronized void trimCache() { + // loop until cache is of desired size. + while (cache.size()>maxCacheSize ) { + TFCKey key = lruKeys.poll(); + if (key==null) { //defensive + // it is defensive since lruKeys presumably covers the cache keys + key = cache.keys().nextElement(); + } + // remove this element. Note that an attempt to remove with the same key again is a no-op, + // which gracefully handles the possible race in markRecentlyUsed(). + cache.remove(key); + } + } + + /** + * compute TFC and cache it, after verifying it was not just added - for this + * matter this method is synchronized, which is not too bad, because there is + * lots of work done in the computations. + */ + private synchronized TotalFacetCounts computeAndCache(TFCKey key, CategoryListCache clCache) throws IOException { + TotalFacetCounts tfc = cache.get(key); + if (tfc == null) { + tfc = TotalFacetCounts.compute(key.indexReader, key.taxonomy, key.facetIndexingParams, clCache); + lruKeys.add(key); + cache.put(key,tfc); + trimCache(); + } + return tfc; + } + + /** + * Load {@link TotalFacetCounts} matching input parameters from the provided outputFile + * and add them into the cache for the provided indexReader, taxonomy, and facetIndexingParams. + * If a {@link TotalFacetCounts} for these parameters already exists in the cache, it will be + * replaced by the loaded one. + * @param inputFile file from which to read the data + * @param indexReader the documents index + * @param taxonomy the taxonomy index + * @param facetIndexingParams the facet indexing parameters + * @throws IOException on error + * @see #store(File, IndexReader, TaxonomyReader, FacetIndexingParams, CategoryListCache) + */ + public synchronized void load(File inputFile, IndexReader indexReader, TaxonomyReader taxonomy, + FacetIndexingParams facetIndexingParams) throws IOException { + if (!inputFile.isFile() || !inputFile.exists() || !inputFile.canRead()) { + throw new IllegalArgumentException("Exepecting an existing readable file: "+inputFile); + } + TFCKey key = new TFCKey(indexReader, taxonomy, facetIndexingParams); + TotalFacetCounts tfc = TotalFacetCounts.loadFromFile(inputFile, taxonomy, facetIndexingParams); + cache.put(key,tfc); + trimCache(); + markRecentlyUsed(key); + } + + /** + * Store the {@link TotalFacetCounts} matching input parameters into the provided outputFile, + * making them available for a later call to {@link #load(File, IndexReader, TaxonomyReader, FacetIndexingParams)}. + * If these {@link TotalFacetCounts} are available in the cache, they are used. But if they are + * not in the cache, this call will first compute them (which will also add them to the cache). + * @param outputFile file to store in. + * @param indexReader the documents index + * @param taxonomy the taxonomy index + * @param facetIndexingParams the facet indexing parameters + * @param clCache category list cache for faster computation, can be null + * @throws IOException on error + * @see #load(File, IndexReader, TaxonomyReader, FacetIndexingParams) + * @see #getTotalCounts(IndexReader, TaxonomyReader, FacetIndexingParams, CategoryListCache) + */ + public void store(File outputFile, IndexReader indexReader, TaxonomyReader taxonomy, + FacetIndexingParams facetIndexingParams, CategoryListCache clCache) throws IOException { + File parentFile = outputFile.getParentFile(); + if ( + ( outputFile.exists() && (!outputFile.isFile() || !outputFile.canWrite())) || + (!outputFile.exists() && (!parentFile.isDirectory() || !parentFile.canWrite())) + ) { + throw new IllegalArgumentException("Exepecting a writable file: "+outputFile); + } + TotalFacetCounts tfc = getTotalCounts(indexReader, taxonomy, facetIndexingParams, clCache); + TotalFacetCounts.storeToFile(outputFile, tfc); + } + + private static class TFCKey { + final IndexReader indexReader; + final TaxonomyReader taxonomy; + private final Iterable clps; + private final int hashCode; + private final int nDels; // needed when a reader used for faceted search was just used for deletion. + final FacetIndexingParams facetIndexingParams; + + public TFCKey(IndexReader indexReader, TaxonomyReader taxonomy, + FacetIndexingParams facetIndexingParams) { + this.indexReader = indexReader; + this.taxonomy = taxonomy; + this.facetIndexingParams = facetIndexingParams; + this.clps = facetIndexingParams.getAllCategoryListParams(); + this.nDels = indexReader.numDeletedDocs(); + hashCode = indexReader.hashCode() ^ taxonomy.hashCode(); + } + + @Override + public int hashCode() { + return hashCode; + } + + @Override + public boolean equals(Object other) { + TFCKey o = (TFCKey) other; + if (indexReader != o.indexReader || taxonomy != o.taxonomy || nDels != o.nDels) { + return false; + } + Iterator it1 = clps.iterator(); + Iterator it2 = o.clps.iterator(); + while (it1.hasNext() && it2.hasNext()) { + if (!it1.next().equals(it2.next())) { + return false; + } + } + return it1.hasNext() == it2.hasNext(); + } + } + + /** + * Clear the cache. + */ + public synchronized void clear() { + cache.clear(); + lruKeys.clear(); + } + + /** + * @return the maximal cache size + */ + public int getCacheSize() { + return maxCacheSize; + } + + /** + * Set the number of TotalFacetCounts arrays that will remain in memory cache. + *

+ * If new size is smaller than current size, the cache is appropriately trimmed. + *

+ * Minimal size is 1, so passing zero or negative size would result in size of 1. + * @param size new size to set + */ + public void setCacheSize(int size) { + if (size < 1) size = 1; + int origSize = maxCacheSize; + maxCacheSize = size; + if (maxCacheSize < origSize) { // need to trim only if the cache was reduced + trimCache(); + } + } +}