+++ /dev/null
-package org.apache.lucene.store;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.concurrent.ConcurrentHashMap;
-
-import org.apache.lucene.index.ConcurrentMergeScheduler;
-import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.IndexWriter; // javadocs
-import org.apache.lucene.index.MergePolicy;
-import org.apache.lucene.index.MergeScheduler;
-import org.apache.lucene.store.RAMDirectory; // javadocs
-import org.apache.lucene.util.IOUtils;
-
-// TODO
-// - let subclass dictate policy...?
-// - rename to MergeCacheingDir? NRTCachingDir
-
-/**
- * Wraps a {@link RAMDirectory}
- * around any provided delegate directory, to
- * be used during NRT search. Make sure you pull the merge
- * scheduler using {@link #getMergeScheduler} and pass that to your
- * {@link IndexWriter}; this class uses that to keep track of which
- * merges are being done by which threads, to decide when to
- * cache each written file.
- *
- * <p>This class is likely only useful in a near-real-time
- * context, where indexing rate is lowish but reopen
- * rate is highish, resulting in many tiny files being
- * written. This directory keeps such segments (as well as
- * the segments produced by merging them, as long as they
- * are small enough), in RAM.</p>
- *
- * <p>This is safe to use: when your app calls {IndexWriter#commit},
- * all cached files will be flushed from the cached and sync'd.</p>
- *
- * <p><b>NOTE</b>: this class is somewhat sneaky in its
- * approach for spying on merges to determine the size of a
- * merge: it records which threads are running which merges
- * by watching ConcurrentMergeScheduler's doMerge method.
- * While this works correctly, likely future versions of
- * this class will take a more general approach.
- *
- * <p>Here's a simple example usage:
- *
- * <pre>
- * Directory fsDir = FSDirectory.open(new File("/path/to/index"));
- * NRTCachingDirectory cachedFSDir = new NRTCachingDirectory(fsDir, 5.0, 60.0);
- * IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_32, analyzer);
- * conf.setMergeScheduler(cachedFSDir.getMergeScheduler());
- * IndexWriter writer = new IndexWriter(cachedFSDir, conf);
- * </pre>
- *
- * <p>This will cache all newly flushed segments, all merges
- * whose expected segment size is <= 5 MB, unless the net
- * cached bytes exceeds 60 MB at which point all writes will
- * not be cached (until the net bytes falls below 60 MB).</p>
- *
- * @lucene.experimental
- */
-
-public class NRTCachingDirectory extends Directory {
-
- private final RAMDirectory cache = new RAMDirectory();
-
- private final Directory delegate;
-
- private final long maxMergeSizeBytes;
- private final long maxCachedBytes;
-
- private static final boolean VERBOSE = false;
-
- /**
- * We will cache a newly created output if 1) it's a
- * flush or a merge and the estimated size of the merged segment is <=
- * maxMergeSizeMB, and 2) the total cached bytes is <=
- * maxCachedMB */
- public NRTCachingDirectory(Directory delegate, double maxMergeSizeMB, double maxCachedMB) {
- this.delegate = delegate;
- maxMergeSizeBytes = (long) (maxMergeSizeMB*1024*1024);
- maxCachedBytes = (long) (maxCachedMB*1024*1024);
- }
-
- @Override
- public LockFactory getLockFactory() {
- return delegate.getLockFactory();
- }
-
- @Override
- public void setLockFactory(LockFactory lf) throws IOException {
- delegate.setLockFactory(lf);
- }
-
- @Override
- public String getLockID() {
- return delegate.getLockID();
- }
-
- @Override
- public Lock makeLock(String name) {
- return delegate.makeLock(name);
- }
-
- @Override
- public void clearLock(String name) throws IOException {
- delegate.clearLock(name);
- }
-
- @Override
- public String toString() {
- return "NRTCachingDirectory(" + delegate + "; maxCacheMB=" + (maxCachedBytes/1024/1024.) + " maxMergeSizeMB=" + (maxMergeSizeBytes/1024/1024.) + ")";
- }
-
- @Override
- public synchronized String[] listAll() throws IOException {
- final Set<String> files = new HashSet<String>();
- for(String f : cache.listAll()) {
- files.add(f);
- }
- // LUCENE-1468: our NRTCachingDirectory will actually exist (RAMDir!),
- // but if the underlying delegate is an FSDir and mkdirs() has not
- // yet been called, because so far everything is a cached write,
- // in this case, we don't want to throw a NoSuchDirectoryException
- try {
- for(String f : delegate.listAll()) {
- // Cannot do this -- if lucene calls createOutput but
- // file already exists then this falsely trips:
- //assert !files.contains(f): "file \"" + f + "\" is in both dirs";
- files.add(f);
- }
- } catch (NoSuchDirectoryException ex) {
- // however, if there are no cached files, then the directory truly
- // does not "exist"
- if (files.isEmpty()) {
- throw ex;
- }
- }
- return files.toArray(new String[files.size()]);
- }
-
- /** Returns how many bytes are being used by the
- * RAMDirectory cache */
- public long sizeInBytes() {
- return cache.sizeInBytes();
- }
-
- @Override
- public synchronized boolean fileExists(String name) throws IOException {
- return cache.fileExists(name) || delegate.fileExists(name);
- }
-
- @Override
- public synchronized long fileModified(String name) throws IOException {
- if (cache.fileExists(name)) {
- return cache.fileModified(name);
- } else {
- return delegate.fileModified(name);
- }
- }
-
- @Override
- @Deprecated
- /* @deprecated Lucene never uses this API; it will be
- * removed in 4.0. */
- public synchronized void touchFile(String name) throws IOException {
- if (cache.fileExists(name)) {
- cache.touchFile(name);
- } else {
- delegate.touchFile(name);
- }
- }
-
- @Override
- public synchronized void deleteFile(String name) throws IOException {
- if (VERBOSE) {
- System.out.println("nrtdir.deleteFile name=" + name);
- }
- if (cache.fileExists(name)) {
- assert !delegate.fileExists(name);
- cache.deleteFile(name);
- } else {
- delegate.deleteFile(name);
- }
- }
-
- @Override
- public synchronized long fileLength(String name) throws IOException {
- if (cache.fileExists(name)) {
- return cache.fileLength(name);
- } else {
- return delegate.fileLength(name);
- }
- }
-
- public String[] listCachedFiles() {
- return cache.listAll();
- }
-
- @Override
- public IndexOutput createOutput(String name) throws IOException {
- if (VERBOSE) {
- System.out.println("nrtdir.createOutput name=" + name);
- }
- if (doCacheWrite(name)) {
- if (VERBOSE) {
- System.out.println(" to cache");
- }
- return cache.createOutput(name);
- } else {
- return delegate.createOutput(name);
- }
- }
-
- @Override
- public void sync(Collection<String> fileNames) throws IOException {
- if (VERBOSE) {
- System.out.println("nrtdir.sync files=" + fileNames);
- }
- for(String fileName : fileNames) {
- unCache(fileName);
- }
- delegate.sync(fileNames);
- }
-
- @Override
- public synchronized IndexInput openInput(String name) throws IOException {
- if (VERBOSE) {
- System.out.println("nrtdir.openInput name=" + name);
- }
- if (cache.fileExists(name)) {
- if (VERBOSE) {
- System.out.println(" from cache");
- }
- return cache.openInput(name);
- } else {
- return delegate.openInput(name);
- }
- }
-
- @Override
- public synchronized IndexInput openInput(String name, int bufferSize) throws IOException {
- if (cache.fileExists(name)) {
- return cache.openInput(name, bufferSize);
- } else {
- return delegate.openInput(name, bufferSize);
- }
- }
-
- /** Close thius directory, which flushes any cached files
- * to the delegate and then closes the delegate. */
- @Override
- public void close() throws IOException {
- for(String fileName : cache.listAll()) {
- unCache(fileName);
- }
- cache.close();
- delegate.close();
- }
-
- private final ConcurrentHashMap<Thread,MergePolicy.OneMerge> merges = new ConcurrentHashMap<Thread,MergePolicy.OneMerge>();
-
- public MergeScheduler getMergeScheduler() {
- return new ConcurrentMergeScheduler() {
- @Override
- protected void doMerge(MergePolicy.OneMerge merge) throws IOException {
- try {
- merges.put(Thread.currentThread(), merge);
- super.doMerge(merge);
- } finally {
- merges.remove(Thread.currentThread());
- }
- }
- };
- }
-
- /** Subclass can override this to customize logic; return
- * true if this file should be written to the RAMDirectory. */
- protected boolean doCacheWrite(String name) {
- final MergePolicy.OneMerge merge = merges.get(Thread.currentThread());
- //System.out.println(Thread.currentThread().getName() + ": CACHE check merge=" + merge + " size=" + (merge==null ? 0 : merge.estimatedMergeBytes));
- return !name.equals(IndexFileNames.SEGMENTS_GEN) && (merge == null || merge.estimatedMergeBytes <= maxMergeSizeBytes) && cache.sizeInBytes() <= maxCachedBytes;
- }
-
- private void unCache(String fileName) throws IOException {
- final IndexOutput out;
- synchronized(this) {
- if (!delegate.fileExists(fileName)) {
- assert cache.fileExists(fileName);
- out = delegate.createOutput(fileName);
- } else {
- out = null;
- }
- }
-
- if (out != null) {
- IndexInput in = null;
- try {
- in = cache.openInput(fileName);
- in.copyBytes(out, in.length());
- } finally {
- IOUtils.close(in, out);
- }
- synchronized(this) {
- cache.deleteFile(fileName);
- }
- }
- }
-}
-