X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/taxonomy/lucene/LuceneTaxonomyReader.java?ds=sidebyside diff --git a/lucene-java-3.4.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/taxonomy/lucene/LuceneTaxonomyReader.java b/lucene-java-3.4.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/taxonomy/lucene/LuceneTaxonomyReader.java deleted file mode 100644 index d8838d5..0000000 --- a/lucene-java-3.4.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/taxonomy/lucene/LuceneTaxonomyReader.java +++ /dev/null @@ -1,563 +0,0 @@ -package org.apache.lucene.facet.taxonomy.lucene; - -import java.io.File; -import java.io.IOException; -import java.util.Iterator; -import java.util.Map; -import java.util.Map.Entry; -import java.util.concurrent.locks.ReadWriteLock; -import java.util.concurrent.locks.ReentrantReadWriteLock; -import java.util.logging.Level; -import java.util.logging.Logger; - -import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FSDirectory; - -import org.apache.lucene.facet.taxonomy.CategoryPath; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; -import org.apache.lucene.util.collections.LRUHashMap; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * LuceneTaxonomyReader is a {@link TaxonomyReader} which retrieves stored - * taxonomy information from a separate Lucene index. By using a Lucene index, - * rather than some specialized file format, we get for "free" its correctness - * (especially regarding concurrency), and the ability to save it on any - * implementation of Directory (and not just the file system). - *
- * Reading from the on-disk index on every method call is too slow, so this
- * implementation employs caching: Some methods cache recent requests and
- * their results, while other methods prefetch all the data into memory
- * and then provide answers directly from in-memory tables. See the
- * documentation of individual methods for comments on their performance.
- *
- * @lucene.experimental
- */
-public class LuceneTaxonomyReader implements TaxonomyReader {
-
- private static final Logger logger = Logger.getLogger(LuceneTaxonomyReader.class.getName());
-
- private IndexReader indexReader;
-
- // The following lock is used to allow multiple threads to read from the
- // index concurrently, while having them block during the very short
- // critical moment of refresh() (see comments below). Note, however, that
- // we only read from the index when we don't have the entry in our cache,
- // and the caches are locked separately.
- private ReadWriteLock indexReaderLock = new ReentrantReadWriteLock();
-
- // The following are the limited-size LRU caches used to cache the latest
- // results from getOrdinal() and getCategoryCache().
- // Because LRUHashMap is not thread-safe, we need to synchronize on this
- // object when using it. Unfortunately, this is not optimal under heavy
- // contention because it means that while one thread is using the cache
- // (reading or modifying) others are blocked from using it - or even
- // starting to do benign things like calculating the hash function. A more
- // efficient approach would be to use a non-locking (as much as possible)
- // concurrent solution, along the lines of java.util.concurrent.ConcurrentHashMap
- // but with LRU semantics.
- // However, even in the current sub-optimal implementation we do not make
- // the mistake of locking out readers while waiting for disk in a cache
- // miss - below, we do not hold cache lock while reading missing data from
- // disk.
- private final LRUHashMap
- * Currently, if the given size is smaller than the current size of
- * a cache, it will not shrink, and rather we be limited to its current
- * size.
- * @param size the new maximum cache size, in number of entries.
- */
- public void setCacheSize(int size) {
- synchronized(getCategoryCache) {
- getCategoryCache.setMaxSize(size);
- }
- synchronized(getOrdinalCache) {
- getOrdinalCache.setMaxSize(size);
- }
- }
-
- /**
- * setDelimiter changes the character that the taxonomy uses in its
- * internal storage as a delimiter between category components. Do not
- * use this method unless you really know what you are doing.
- *
- * If you do use this method, make sure you call it before any other
- * methods that actually queries the taxonomy. Moreover, make sure you
- * always pass the same delimiter for all LuceneTaxonomyWriter and
- * LuceneTaxonomyReader objects you create.
- */
- public void setDelimiter(char delimiter) {
- this.delimiter = delimiter;
- }
-
- public int getOrdinal(CategoryPath categoryPath) throws IOException {
- if (categoryPath.length()==0) {
- return ROOT_ORDINAL;
- }
- String path = categoryPath.toString(delimiter);
-
- // First try to find the answer in the LRU cache:
- synchronized(getOrdinalCache) {
- Integer res = getOrdinalCache.get(path);
- if (res!=null) {
- return res.intValue();
- }
- }
-
- // If we're still here, we have a cache miss. We need to fetch the
- // value from disk, and then also put it in the cache:
- int ret = TaxonomyReader.INVALID_ORDINAL;
- try {
- indexReaderLock.readLock().lock();
- TermDocs docs = indexReader.termDocs(new Term(Consts.FULL, path));
- if (docs.next()) {
- ret = docs.doc();
- }
- } finally {
- indexReaderLock.readLock().unlock();
- }
-
- // Put the new value in the cache. Note that it is possible that while
- // we were doing the above fetching (without the cache locked), some
- // other thread already added the same category to the cache. We do
- // not care about this possibilty, as LRUCache replaces previous values
- // of the same keys (it doesn't store duplicates).
- synchronized(getOrdinalCache) {
- // GB: new Integer(int); creates a new object each and every time.
- // Integer.valueOf(int) might not (See JavaDoc).
- getOrdinalCache.put(path, Integer.valueOf(ret));
- }
-
- return ret;
- }
-
- public CategoryPath getPath(int ordinal) throws CorruptIndexException, IOException {
- // TODO (Facet): Currently, the LRU cache we use (getCategoryCache) holds
- // strings with delimiters, not CategoryPath objects, so even if
- // we have a cache hit, we need to process the string and build a new
- // CategoryPath object every time. What is preventing us from putting
- // the actual CategoryPath object in the cache is the fact that these
- // objects are mutable. So we should create an immutable (read-only)
- // interface that CategoryPath implements, and this method should
- // return this interface, not the writable CategoryPath.
- String label = getLabel(ordinal);
- if (label==null) {
- return null;
- }
- return new CategoryPath(label, delimiter);
- }
-
- public boolean getPath(int ordinal, CategoryPath result) throws CorruptIndexException, IOException {
- String label = getLabel(ordinal);
- if (label==null) {
- return false;
- }
- result.clear();
- result.add(label, delimiter);
- return true;
- }
-
- private String getLabel(int catID) throws CorruptIndexException, IOException {
- // First try to find the answer in the LRU cache. It is very
- // unfortunate that we need to allocate an Integer object here -
- // it would have been better if we used a hash table specifically
- // designed for int keys...
- // GB: new Integer(int); creates a new object each and every time.
- // Integer.valueOf(int) might not (See JavaDoc).
- Integer catIDInteger = Integer.valueOf(catID);
-
- synchronized(getCategoryCache) {
- String res = getCategoryCache.get(catIDInteger);
- if (res!=null) {
- return res;
- }
- }
-
- // If we're still here, we have a cache miss. We need to fetch the
- // value from disk, and then also put it in the cache:
- String ret;
- try {
- indexReaderLock.readLock().lock();
- // The taxonomy API dictates that if we get an invalid category
- // ID, we should return null, If we don't check this here, we
- // can some sort of an exception from the document() call below.
- // NOTE: Currently, we *do not* cache this return value; There
- // isn't much point to do so, because checking the validity of
- // the docid doesn't require disk access - just comparing with
- // the number indexReader.maxDoc().
- if (catID<0 || catID>=indexReader.maxDoc()) {
- return null;
- }
- ret = indexReader.document(catID, Consts.fullPathSelector)
- .get(Consts.FULL);
- } finally {
- indexReaderLock.readLock().unlock();
- }
- // Put the new value in the cache. Note that it is possible that while
- // we were doing the above fetching (without the cache locked), some
- // other thread already added the same category to the cache. We do
- // not care about this possibility, as LRUCache replaces previous
- // values of the same keys (it doesn't store duplicates).
- synchronized (getCategoryCache) {
- getCategoryCache.put(catIDInteger, ret);
- }
-
- return ret;
- }
-
- public int getParent(int ordinal) {
- // Note how we don't need to hold the read lock to do the following,
- // because the array reference is volatile, ensuring the correct
- // visibility and ordering: if we get the new reference, the new
- // data is also visible to this thread.
- return getParentArray()[ordinal];
- }
-
- /**
- * getParentArray() returns an int array of size getSize() listing the
- * ordinal of the parent category of each category in the taxonomy.
- *
- * The caller can hold on to the array it got indefinitely - it is
- * guaranteed that no-one else will modify it. The other side of the
- * same coin is that the caller must treat the array it got as read-only
- * and not modify it, because other callers might have gotten the
- * same array too, and getParent() calls are also answered from the
- * same array.
- *
- * The getParentArray() call is extremely efficient, merely returning
- * a reference to an array that already exists. For a caller that plans
- * to call getParent() for many categories, using getParentArray() and
- * the array it returns is a somewhat faster approach because it avoids
- * the overhead of method calls and volatile dereferencing.
- *
- * If you use getParentArray() instead of getParent(), remember that
- * the array you got is (naturally) not modified after a refresh(),
- * so you should always call getParentArray() again after a refresh().
- */
-
- public int[] getParentArray() {
- // Note how we don't need to hold the read lock to do the following,
- // because the array reference is volatile, ensuring the correct
- // visibility and ordering: if we get the new reference, the new
- // data is also visible to this thread.
- return parentArray.getArray();
- }
-
- // Note that refresh() is synchronized (it is the only synchronized
- // method in this class) to ensure that it never gets called concurrently
- // with itself.
- public synchronized void refresh() throws IOException {
- /*
- * Since refresh() can be a lengthy operation, it is very important that we
- * avoid locking out all readers for its duration. This is why we don't hold
- * the indexReaderLock write lock for the entire duration of this method. In
- * fact, it is enough to hold it only during a single assignment! Other
- * comments in this method will explain this.
- */
-
- // note that the lengthy operation indexReader.reopen() does not
- // modify the reader, so we can do it without holding a lock. We can
- // safely read indexReader without holding the write lock, because
- // no other thread can be writing at this time (this method is the
- // only possible writer, and it is "synchronized" to avoid this case).
- IndexReader r2 = indexReader.reopen();
- if (indexReader != r2) {
- IndexReader oldreader = indexReader;
- // we can close the old searcher, but need to synchronize this
- // so that we don't close it in the middle that another routine
- // is reading from it.
- indexReaderLock.writeLock().lock();
- indexReader = r2;
- indexReaderLock.writeLock().unlock();
- // We can close the old reader, but need to be certain that we
- // don't close it while another method is reading from it.
- // Luckily, we can be certain of that even without putting the
- // oldreader.close() in the locked section. The reason is that
- // after lock() succeeded above, we know that all existing readers
- // had finished (this is what a read-write lock ensures). New
- // readers, starting after the unlock() we just did, already got
- // the new indexReader we set above. So nobody can be possibly
- // using the old indexReader, and we can close it:
- oldreader.close();
-
- // We prefetch some of the arrays to make requests much faster.
- // Let's refresh these prefetched arrays; This refresh is much
- // is made more efficient by assuming that it is enough to read
- // the values for new categories (old categories could not have been
- // changed or deleted)
- // Note that this this done without the write lock being held,
- // which means that it is possible that during a refresh(), a
- // reader will have some methods (like getOrdinal and getCategory)
- // return fresh information, while getParent()
- // (only to be prefetched now) still return older information.
- // We consider this to be acceptable. The important thing,
- // however, is that refreshPrefetchArrays() itself writes to
- // the arrays in a correct manner (see discussion there)
- parentArray.refresh(indexReader);
-
- // Remove any INVALID_ORDINAL values from the ordinal cache,
- // because it is possible those are now answered by the new data!
- Iterator