1 package org.apache.lucene.facet.search;
4 import java.io.IOException;
5 import java.util.Iterator;
6 import java.util.LinkedHashMap;
7 import java.util.concurrent.ConcurrentHashMap;
8 import java.util.concurrent.ConcurrentLinkedQueue;
10 import org.apache.lucene.index.IndexReader;
12 import org.apache.lucene.facet.index.params.CategoryListParams;
13 import org.apache.lucene.facet.index.params.FacetIndexingParams;
14 import org.apache.lucene.facet.search.cache.CategoryListCache;
15 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
18 * Licensed to the Apache Software Foundation (ASF) under one or more
19 * contributor license agreements. See the NOTICE file distributed with
20 * this work for additional information regarding copyright ownership.
21 * The ASF licenses this file to You under the Apache License, Version 2.0
22 * (the "License"); you may not use this file except in compliance with
23 * the License. You may obtain a copy of the License at
25 * http://www.apache.org/licenses/LICENSE-2.0
27 * Unless required by applicable law or agreed to in writing, software
28 * distributed under the License is distributed on an "AS IS" BASIS,
29 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
30 * See the License for the specific language governing permissions and
31 * limitations under the License.
35 * Manage an LRU cache for {@link TotalFacetCounts} per index, taxonomy, and
36 * facet indexing params.
38 * @lucene.experimental
40 public final class TotalFacetCountsCache {
43 * Default size of in memory cache for computed total facet counts.
44 * Set to 2 for the case when an application reopened a reader and
45 * the original one is still in use (Otherwise there will be
46 * switching again and again between the two.)
48 public static final int DEFAULT_CACHE_SIZE = 2;
50 private static final TotalFacetCountsCache singleton = new TotalFacetCountsCache();
53 * Get the single instance of this cache
55 public static TotalFacetCountsCache getSingleton() {
60 * In-memory cache of TFCs.
62 * <li>It's size is kept within limits through {@link #trimCache()}.
63 * <li>An LRU eviction policy is applied, by maintaining active keys in {@link #lruKeys}.
64 * <li>After each addition to the cache, trimCache is called, to remove entries least recently used.
66 * @see #markRecentlyUsed(TFCKey)
68 private ConcurrentHashMap<TFCKey,TotalFacetCounts> cache = new ConcurrentHashMap<TFCKey,TotalFacetCounts>();
71 * A queue of active keys for applying LRU policy on eviction from the {@link #cache}.
72 * @see #markRecentlyUsed(TFCKey)
74 private ConcurrentLinkedQueue<TFCKey> lruKeys = new ConcurrentLinkedQueue<TFCKey>();
76 private int maxCacheSize = DEFAULT_CACHE_SIZE;
78 /** private constructor for singleton pattern */
79 private TotalFacetCountsCache() {
83 * Get the total facet counts for a reader/taxonomy pair and facet indexing parameters.
84 * If not in cache, computed here and added to the cache for later use.
85 * @param indexReader the documents index
86 * @param taxonomy the taxonomy index
87 * @param facetIndexingParams facet indexing parameters
88 * @param clCache category list cache for faster computation, can be null
89 * @return the total facet counts.
91 public TotalFacetCounts getTotalCounts(IndexReader indexReader, TaxonomyReader taxonomy,
92 FacetIndexingParams facetIndexingParams, CategoryListCache clCache) throws IOException {
94 TFCKey key = new TFCKey(indexReader, taxonomy, facetIndexingParams);
95 // it is important that this call is not synchronized, so that available TFC
96 // would not wait for one that needs to be computed.
97 TotalFacetCounts tfc = cache.get(key);
99 markRecentlyUsed(key);
102 return computeAndCache(key, clCache);
106 * Mark key as it as recently used.
108 * <b>Implementation notes: Synchronization considerations and the interaction between lruKeys and cache:</b>
110 * <li>A concurrent {@link LinkedHashMap} would have made this class much simpler.
111 * But unfortunately, Java does not provide one.
112 * Instead, we combine two concurrent objects:
114 * <li>{@link ConcurrentHashMap} for the cached TFCs.
115 * <li>{@link ConcurrentLinkedQueue} for active keys
117 * <li>Both {@link #lruKeys} and {@link #cache} are concurrently safe.
118 * <li>Checks for a cached item through getTotalCounts() are not synchronized.
119 * Therefore, the case that a needed TFC is in the cache is very fast:
120 * it does not wait for the computation of other TFCs.
121 * <li>computeAndCache() is synchronized, and, has a (double) check of the required
122 * TFC, to avoid computing the same TFC twice.
123 * <li>A race condition in this method (markRecentlyUsed) might result in two copies
124 * of the same 'key' in lruKeys, but this is handled by the loop in trimCache(),
125 * where an attempt to remove the same key twice is a no-op.
128 private void markRecentlyUsed(TFCKey key) {
133 private synchronized void trimCache() {
134 // loop until cache is of desired size.
135 while (cache.size()>maxCacheSize ) {
136 TFCKey key = lruKeys.poll();
137 if (key==null) { //defensive
138 // it is defensive since lruKeys presumably covers the cache keys
139 key = cache.keys().nextElement();
141 // remove this element. Note that an attempt to remove with the same key again is a no-op,
142 // which gracefully handles the possible race in markRecentlyUsed().
148 * compute TFC and cache it, after verifying it was not just added - for this
149 * matter this method is synchronized, which is not too bad, because there is
150 * lots of work done in the computations.
152 private synchronized TotalFacetCounts computeAndCache(TFCKey key, CategoryListCache clCache) throws IOException {
153 TotalFacetCounts tfc = cache.get(key);
155 tfc = TotalFacetCounts.compute(key.indexReader, key.taxonomy, key.facetIndexingParams, clCache);
164 * Load {@link TotalFacetCounts} matching input parameters from the provided outputFile
165 * and add them into the cache for the provided indexReader, taxonomy, and facetIndexingParams.
166 * If a {@link TotalFacetCounts} for these parameters already exists in the cache, it will be
167 * replaced by the loaded one.
168 * @param inputFile file from which to read the data
169 * @param indexReader the documents index
170 * @param taxonomy the taxonomy index
171 * @param facetIndexingParams the facet indexing parameters
172 * @throws IOException on error
173 * @see #store(File, IndexReader, TaxonomyReader, FacetIndexingParams, CategoryListCache)
175 public synchronized void load(File inputFile, IndexReader indexReader, TaxonomyReader taxonomy,
176 FacetIndexingParams facetIndexingParams) throws IOException {
177 if (!inputFile.isFile() || !inputFile.exists() || !inputFile.canRead()) {
178 throw new IllegalArgumentException("Exepecting an existing readable file: "+inputFile);
180 TFCKey key = new TFCKey(indexReader, taxonomy, facetIndexingParams);
181 TotalFacetCounts tfc = TotalFacetCounts.loadFromFile(inputFile, taxonomy, facetIndexingParams);
184 markRecentlyUsed(key);
188 * Store the {@link TotalFacetCounts} matching input parameters into the provided outputFile,
189 * making them available for a later call to {@link #load(File, IndexReader, TaxonomyReader, FacetIndexingParams)}.
190 * If these {@link TotalFacetCounts} are available in the cache, they are used. But if they are
191 * not in the cache, this call will first compute them (which will also add them to the cache).
192 * @param outputFile file to store in.
193 * @param indexReader the documents index
194 * @param taxonomy the taxonomy index
195 * @param facetIndexingParams the facet indexing parameters
196 * @param clCache category list cache for faster computation, can be null
197 * @throws IOException on error
198 * @see #load(File, IndexReader, TaxonomyReader, FacetIndexingParams)
199 * @see #getTotalCounts(IndexReader, TaxonomyReader, FacetIndexingParams, CategoryListCache)
201 public void store(File outputFile, IndexReader indexReader, TaxonomyReader taxonomy,
202 FacetIndexingParams facetIndexingParams, CategoryListCache clCache) throws IOException {
203 File parentFile = outputFile.getParentFile();
205 ( outputFile.exists() && (!outputFile.isFile() || !outputFile.canWrite())) ||
206 (!outputFile.exists() && (!parentFile.isDirectory() || !parentFile.canWrite()))
208 throw new IllegalArgumentException("Exepecting a writable file: "+outputFile);
210 TotalFacetCounts tfc = getTotalCounts(indexReader, taxonomy, facetIndexingParams, clCache);
211 TotalFacetCounts.storeToFile(outputFile, tfc);
214 private static class TFCKey {
215 final IndexReader indexReader;
216 final TaxonomyReader taxonomy;
217 private final Iterable<CategoryListParams> clps;
218 private final int hashCode;
219 private final int nDels; // needed when a reader used for faceted search was just used for deletion.
220 final FacetIndexingParams facetIndexingParams;
222 public TFCKey(IndexReader indexReader, TaxonomyReader taxonomy,
223 FacetIndexingParams facetIndexingParams) {
224 this.indexReader = indexReader;
225 this.taxonomy = taxonomy;
226 this.facetIndexingParams = facetIndexingParams;
227 this.clps = facetIndexingParams.getAllCategoryListParams();
228 this.nDels = indexReader.numDeletedDocs();
229 hashCode = indexReader.hashCode() ^ taxonomy.hashCode();
233 public int hashCode() {
238 public boolean equals(Object other) {
239 TFCKey o = (TFCKey) other;
240 if (indexReader != o.indexReader || taxonomy != o.taxonomy || nDels != o.nDels) {
243 Iterator<CategoryListParams> it1 = clps.iterator();
244 Iterator<CategoryListParams> it2 = o.clps.iterator();
245 while (it1.hasNext() && it2.hasNext()) {
246 if (!it1.next().equals(it2.next())) {
250 return it1.hasNext() == it2.hasNext();
257 public synchronized void clear() {
263 * @return the maximal cache size
265 public int getCacheSize() {
270 * Set the number of TotalFacetCounts arrays that will remain in memory cache.
272 * If new size is smaller than current size, the cache is appropriately trimmed.
274 * Minimal size is 1, so passing zero or negative size would result in size of 1.
275 * @param size new size to set
277 public void setCacheSize(int size) {
278 if (size < 1) size = 1;
279 int origSize = maxCacheSize;
281 if (maxCacheSize < origSize) { // need to trim only if the cache was reduced