X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/store/NRTCachingDirectory.java?ds=inline diff --git a/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/store/NRTCachingDirectory.java b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/store/NRTCachingDirectory.java new file mode 100644 index 0000000..6dfb2b9 --- /dev/null +++ b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/store/NRTCachingDirectory.java @@ -0,0 +1,329 @@ +package org.apache.lucene.store; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.lucene.index.ConcurrentMergeScheduler; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.IndexWriter; // javadocs +import org.apache.lucene.index.MergePolicy; +import org.apache.lucene.index.MergeScheduler; +import org.apache.lucene.store.RAMDirectory; // javadocs +import org.apache.lucene.util.IOUtils; + +// TODO +// - let subclass dictate policy...? +// - rename to MergeCacheingDir? NRTCachingDir + +/** + * Wraps a {@link RAMDirectory} + * around any provided delegate directory, to + * be used during NRT search. Make sure you pull the merge + * scheduler using {@link #getMergeScheduler} and pass that to your + * {@link IndexWriter}; this class uses that to keep track of which + * merges are being done by which threads, to decide when to + * cache each written file. + * + *
This class is likely only useful in a near-real-time + * context, where indexing rate is lowish but reopen + * rate is highish, resulting in many tiny files being + * written. This directory keeps such segments (as well as + * the segments produced by merging them, as long as they + * are small enough), in RAM.
+ * + *This is safe to use: when your app calls {IndexWriter#commit}, + * all cached files will be flushed from the cached and sync'd.
+ * + *NOTE: this class is somewhat sneaky in its + * approach for spying on merges to determine the size of a + * merge: it records which threads are running which merges + * by watching ConcurrentMergeScheduler's doMerge method. + * While this works correctly, likely future versions of + * this class will take a more general approach. + * + *
Here's a simple example usage: + * + *
+ * Directory fsDir = FSDirectory.open(new File("/path/to/index")); + * NRTCachingDirectory cachedFSDir = new NRTCachingDirectory(fsDir, 5.0, 60.0); + * IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_32, analyzer); + * conf.setMergeScheduler(cachedFSDir.getMergeScheduler()); + * IndexWriter writer = new IndexWriter(cachedFSDir, conf); + *+ * + *
This will cache all newly flushed segments, all merges + * whose expected segment size is <= 5 MB, unless the net + * cached bytes exceeds 60 MB at which point all writes will + * not be cached (until the net bytes falls below 60 MB).
+ * + * @lucene.experimental + */ + +public class NRTCachingDirectory extends Directory { + + private final RAMDirectory cache = new RAMDirectory(); + + private final Directory delegate; + + private final long maxMergeSizeBytes; + private final long maxCachedBytes; + + private static final boolean VERBOSE = false; + + /** + * We will cache a newly created output if 1) it's a + * flush or a merge and the estimated size of the merged segment is <= + * maxMergeSizeMB, and 2) the total cached bytes is <= + * maxCachedMB */ + public NRTCachingDirectory(Directory delegate, double maxMergeSizeMB, double maxCachedMB) { + this.delegate = delegate; + maxMergeSizeBytes = (long) (maxMergeSizeMB*1024*1024); + maxCachedBytes = (long) (maxCachedMB*1024*1024); + } + + @Override + public LockFactory getLockFactory() { + return delegate.getLockFactory(); + } + + @Override + public void setLockFactory(LockFactory lf) throws IOException { + delegate.setLockFactory(lf); + } + + @Override + public String getLockID() { + return delegate.getLockID(); + } + + @Override + public Lock makeLock(String name) { + return delegate.makeLock(name); + } + + @Override + public void clearLock(String name) throws IOException { + delegate.clearLock(name); + } + + @Override + public String toString() { + return "NRTCachingDirectory(" + delegate + "; maxCacheMB=" + (maxCachedBytes/1024/1024.) + " maxMergeSizeMB=" + (maxMergeSizeBytes/1024/1024.) + ")"; + } + + @Override + public synchronized String[] listAll() throws IOException { + final Set