1 package org.apache.lucene.facet.taxonomy.writercache.lru;
3 import org.apache.lucene.facet.taxonomy.CategoryPath;
4 import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
7 * Licensed to the Apache Software Foundation (ASF) under one or more
8 * contributor license agreements. See the NOTICE file distributed with
9 * this work for additional information regarding copyright ownership.
10 * The ASF licenses this file to You under the Apache License, Version 2.0
11 * (the "License"); you may not use this file except in compliance with
12 * the License. You may obtain a copy of the License at
14 * http://www.apache.org/licenses/LICENSE-2.0
16 * Unless required by applicable law or agreed to in writing, software
17 * distributed under the License is distributed on an "AS IS" BASIS,
18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 * See the License for the specific language governing permissions and
20 * limitations under the License.
24 * LRU {@link TaxonomyWriterCache} - good choice for huge taxonomies.
26 * @lucene.experimental
28 public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
30 public enum LRUType { LRU_HASHED, LRU_STRING }
32 private NameIntCacheLRU cache;
34 public LruTaxonomyWriterCache(int cacheSize) {
35 // TODO (Facet): choose between NameHashIntCacheLRU and NameIntCacheLRU.
36 // For guaranteed correctness - not relying on no-collisions in the hash
37 // function, NameIntCacheLRU should be used:
38 // On the other hand, NameHashIntCacheLRU takes less RAM but if there
39 // are collisions (which we never found) two different paths would be
40 // mapped to the same ordinal...
41 this(cacheSize, LRUType.LRU_HASHED);
44 public LruTaxonomyWriterCache(int cacheSize, LRUType lruType) {
45 // TODO (Facet): choose between NameHashIntCacheLRU and NameIntCacheLRU.
46 // For guaranteed correctness - not relying on no-collisions in the hash
47 // function, NameIntCacheLRU should be used:
48 // On the other hand, NameHashIntCacheLRU takes less RAM but if there
49 // are collisions (which we never found) two different paths would be
50 // mapped to the same ordinal...
51 if (lruType == LRUType.LRU_HASHED) {
52 this.cache = new NameHashIntCacheLRU(cacheSize);
54 this.cache = new NameIntCacheLRU(cacheSize);
58 public boolean hasRoom(int n) {
59 return n<=(cache.getMaxSize()-cache.getSize());
67 public int get(CategoryPath categoryPath) {
68 Integer res = cache.get(categoryPath);
73 return res.intValue();
76 public int get(CategoryPath categoryPath, int length) {
77 if (length<0 || length>categoryPath.length()) {
78 length = categoryPath.length();
80 // TODO (Facet): unfortunately, we make a copy here! we can avoid part of
81 // the copy by creating a wrapper object (but this still creates a new
82 // object). A better implementation of the cache would not use Java's
83 // hash table, but rather some other hash table we can control, and
84 // pass the length parameter into it...
85 Integer res = cache.get(new CategoryPath(categoryPath, length));
89 return res.intValue();
92 public boolean put(CategoryPath categoryPath, int ordinal) {
93 boolean ret = cache.put(categoryPath, new Integer(ordinal));
94 // If the cache is full, we need to clear one or more old entries
95 // from the cache. However, if we delete from the cache a recent
96 // addition that isn't yet in our reader, for this entry to be
97 // visible to us we need to make sure that the changes have been
98 // committed and we reopen the reader. Because this is a slow
99 // operation, we don't delete entries one-by-one but rather in bulk
100 // (put() removes the 2/3rd oldest entries).
107 public boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) {
108 boolean ret = cache.put(categoryPath, prefixLen, new Integer(ordinal));
109 // If the cache is full, we need to clear one or more old entries
110 // from the cache. However, if we delete from the cache a recent
111 // addition that isn't yet in our reader, for this entry to be
112 // visible to us we need to make sure that the changes have been
113 // committed and we reopen the reader. Because this is a slow
114 // operation, we don't delete entries one-by-one but rather in bulk
115 // (put() removes the 2/3rd oldest entries).