X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/TotalFacetCountsCache.java?ds=sidebyside
diff --git a/lucene-java-3.5.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/TotalFacetCountsCache.java b/lucene-java-3.5.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/TotalFacetCountsCache.java
new file mode 100644
index 0000000..23b9562
--- /dev/null
+++ b/lucene-java-3.5.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/TotalFacetCountsCache.java
@@ -0,0 +1,285 @@
+package org.apache.lucene.facet.search;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentLinkedQueue;
+
+import org.apache.lucene.index.IndexReader;
+
+import org.apache.lucene.facet.index.params.CategoryListParams;
+import org.apache.lucene.facet.index.params.FacetIndexingParams;
+import org.apache.lucene.facet.search.cache.CategoryListCache;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Manage an LRU cache for {@link TotalFacetCounts} per index, taxonomy, and
+ * facet indexing params.
+ *
+ * @lucene.experimental
+ */
+public final class TotalFacetCountsCache {
+
+ /**
+ * Default size of in memory cache for computed total facet counts.
+ * Set to 2 for the case when an application reopened a reader and
+ * the original one is still in use (Otherwise there will be
+ * switching again and again between the two.)
+ */
+ public static final int DEFAULT_CACHE_SIZE = 2;
+
+ private static final TotalFacetCountsCache singleton = new TotalFacetCountsCache();
+
+ /**
+ * Get the single instance of this cache
+ */
+ public static TotalFacetCountsCache getSingleton() {
+ return singleton;
+ }
+
+ /**
+ * In-memory cache of TFCs.
+ *
+ * - It's size is kept within limits through {@link #trimCache()}.
+ *
- An LRU eviction policy is applied, by maintaining active keys in {@link #lruKeys}.
+ *
- After each addition to the cache, trimCache is called, to remove entries least recently used.
+ *
+ * @see #markRecentlyUsed(TFCKey)
+ */
+ private ConcurrentHashMap cache = new ConcurrentHashMap();
+
+ /**
+ * A queue of active keys for applying LRU policy on eviction from the {@link #cache}.
+ * @see #markRecentlyUsed(TFCKey)
+ */
+ private ConcurrentLinkedQueue lruKeys = new ConcurrentLinkedQueue();
+
+ private int maxCacheSize = DEFAULT_CACHE_SIZE;
+
+ /** private constructor for singleton pattern */
+ private TotalFacetCountsCache() {
+ }
+
+ /**
+ * Get the total facet counts for a reader/taxonomy pair and facet indexing parameters.
+ * If not in cache, computed here and added to the cache for later use.
+ * @param indexReader the documents index
+ * @param taxonomy the taxonomy index
+ * @param facetIndexingParams facet indexing parameters
+ * @param clCache category list cache for faster computation, can be null
+ * @return the total facet counts.
+ */
+ public TotalFacetCounts getTotalCounts(IndexReader indexReader, TaxonomyReader taxonomy,
+ FacetIndexingParams facetIndexingParams, CategoryListCache clCache) throws IOException {
+ // create the key
+ TFCKey key = new TFCKey(indexReader, taxonomy, facetIndexingParams);
+ // it is important that this call is not synchronized, so that available TFC
+ // would not wait for one that needs to be computed.
+ TotalFacetCounts tfc = cache.get(key);
+ if (tfc != null) {
+ markRecentlyUsed(key);
+ return tfc;
+ }
+ return computeAndCache(key, clCache);
+ }
+
+ /**
+ * Mark key as it as recently used.
+ *
+ * Implementation notes: Synchronization considerations and the interaction between lruKeys and cache:
+ *
+ * - A concurrent {@link LinkedHashMap} would have made this class much simpler.
+ * But unfortunately, Java does not provide one.
+ * Instead, we combine two concurrent objects:
+ *
+ * - {@link ConcurrentHashMap} for the cached TFCs.
+ *
- {@link ConcurrentLinkedQueue} for active keys
+ *
+ * - Both {@link #lruKeys} and {@link #cache} are concurrently safe.
+ *
- Checks for a cached item through getTotalCounts() are not synchronized.
+ * Therefore, the case that a needed TFC is in the cache is very fast:
+ * it does not wait for the computation of other TFCs.
+ *
- computeAndCache() is synchronized, and, has a (double) check of the required
+ * TFC, to avoid computing the same TFC twice.
+ *
- A race condition in this method (markRecentlyUsed) might result in two copies
+ * of the same 'key' in lruKeys, but this is handled by the loop in trimCache(),
+ * where an attempt to remove the same key twice is a no-op.
+ *
+ */
+ private void markRecentlyUsed(TFCKey key) {
+ lruKeys.remove(key);
+ lruKeys.add(key);
+ }
+
+ private synchronized void trimCache() {
+ // loop until cache is of desired size.
+ while (cache.size()>maxCacheSize ) {
+ TFCKey key = lruKeys.poll();
+ if (key==null) { //defensive
+ // it is defensive since lruKeys presumably covers the cache keys
+ key = cache.keys().nextElement();
+ }
+ // remove this element. Note that an attempt to remove with the same key again is a no-op,
+ // which gracefully handles the possible race in markRecentlyUsed().
+ cache.remove(key);
+ }
+ }
+
+ /**
+ * compute TFC and cache it, after verifying it was not just added - for this
+ * matter this method is synchronized, which is not too bad, because there is
+ * lots of work done in the computations.
+ */
+ private synchronized TotalFacetCounts computeAndCache(TFCKey key, CategoryListCache clCache) throws IOException {
+ TotalFacetCounts tfc = cache.get(key);
+ if (tfc == null) {
+ tfc = TotalFacetCounts.compute(key.indexReader, key.taxonomy, key.facetIndexingParams, clCache);
+ lruKeys.add(key);
+ cache.put(key,tfc);
+ trimCache();
+ }
+ return tfc;
+ }
+
+ /**
+ * Load {@link TotalFacetCounts} matching input parameters from the provided outputFile
+ * and add them into the cache for the provided indexReader, taxonomy, and facetIndexingParams.
+ * If a {@link TotalFacetCounts} for these parameters already exists in the cache, it will be
+ * replaced by the loaded one.
+ * @param inputFile file from which to read the data
+ * @param indexReader the documents index
+ * @param taxonomy the taxonomy index
+ * @param facetIndexingParams the facet indexing parameters
+ * @throws IOException on error
+ * @see #store(File, IndexReader, TaxonomyReader, FacetIndexingParams, CategoryListCache)
+ */
+ public synchronized void load(File inputFile, IndexReader indexReader, TaxonomyReader taxonomy,
+ FacetIndexingParams facetIndexingParams) throws IOException {
+ if (!inputFile.isFile() || !inputFile.exists() || !inputFile.canRead()) {
+ throw new IllegalArgumentException("Exepecting an existing readable file: "+inputFile);
+ }
+ TFCKey key = new TFCKey(indexReader, taxonomy, facetIndexingParams);
+ TotalFacetCounts tfc = TotalFacetCounts.loadFromFile(inputFile, taxonomy, facetIndexingParams);
+ cache.put(key,tfc);
+ trimCache();
+ markRecentlyUsed(key);
+ }
+
+ /**
+ * Store the {@link TotalFacetCounts} matching input parameters into the provided outputFile,
+ * making them available for a later call to {@link #load(File, IndexReader, TaxonomyReader, FacetIndexingParams)}.
+ * If these {@link TotalFacetCounts} are available in the cache, they are used. But if they are
+ * not in the cache, this call will first compute them (which will also add them to the cache).
+ * @param outputFile file to store in.
+ * @param indexReader the documents index
+ * @param taxonomy the taxonomy index
+ * @param facetIndexingParams the facet indexing parameters
+ * @param clCache category list cache for faster computation, can be null
+ * @throws IOException on error
+ * @see #load(File, IndexReader, TaxonomyReader, FacetIndexingParams)
+ * @see #getTotalCounts(IndexReader, TaxonomyReader, FacetIndexingParams, CategoryListCache)
+ */
+ public void store(File outputFile, IndexReader indexReader, TaxonomyReader taxonomy,
+ FacetIndexingParams facetIndexingParams, CategoryListCache clCache) throws IOException {
+ File parentFile = outputFile.getParentFile();
+ if (
+ ( outputFile.exists() && (!outputFile.isFile() || !outputFile.canWrite())) ||
+ (!outputFile.exists() && (!parentFile.isDirectory() || !parentFile.canWrite()))
+ ) {
+ throw new IllegalArgumentException("Exepecting a writable file: "+outputFile);
+ }
+ TotalFacetCounts tfc = getTotalCounts(indexReader, taxonomy, facetIndexingParams, clCache);
+ TotalFacetCounts.storeToFile(outputFile, tfc);
+ }
+
+ private static class TFCKey {
+ final IndexReader indexReader;
+ final TaxonomyReader taxonomy;
+ private final Iterable clps;
+ private final int hashCode;
+ private final int nDels; // needed when a reader used for faceted search was just used for deletion.
+ final FacetIndexingParams facetIndexingParams;
+
+ public TFCKey(IndexReader indexReader, TaxonomyReader taxonomy,
+ FacetIndexingParams facetIndexingParams) {
+ this.indexReader = indexReader;
+ this.taxonomy = taxonomy;
+ this.facetIndexingParams = facetIndexingParams;
+ this.clps = facetIndexingParams.getAllCategoryListParams();
+ this.nDels = indexReader.numDeletedDocs();
+ hashCode = indexReader.hashCode() ^ taxonomy.hashCode();
+ }
+
+ @Override
+ public int hashCode() {
+ return hashCode;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ TFCKey o = (TFCKey) other;
+ if (indexReader != o.indexReader || taxonomy != o.taxonomy || nDels != o.nDels) {
+ return false;
+ }
+ Iterator it1 = clps.iterator();
+ Iterator it2 = o.clps.iterator();
+ while (it1.hasNext() && it2.hasNext()) {
+ if (!it1.next().equals(it2.next())) {
+ return false;
+ }
+ }
+ return it1.hasNext() == it2.hasNext();
+ }
+ }
+
+ /**
+ * Clear the cache.
+ */
+ public synchronized void clear() {
+ cache.clear();
+ lruKeys.clear();
+ }
+
+ /**
+ * @return the maximal cache size
+ */
+ public int getCacheSize() {
+ return maxCacheSize;
+ }
+
+ /**
+ * Set the number of TotalFacetCounts arrays that will remain in memory cache.
+ *
+ * If new size is smaller than current size, the cache is appropriately trimmed.
+ *
+ * Minimal size is 1, so passing zero or negative size would result in size of 1.
+ * @param size new size to set
+ */
+ public void setCacheSize(int size) {
+ if (size < 1) size = 1;
+ int origSize = maxCacheSize;
+ maxCacheSize = size;
+ if (maxCacheSize < origSize) { // need to trim only if the cache was reduced
+ trimCache();
+ }
+ }
+}