X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java?ds=sidebyside diff --git a/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java new file mode 100644 index 0000000..97a4376 --- /dev/null +++ b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java @@ -0,0 +1,716 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.NoSuchDirectoryException; +import org.apache.lucene.util.CollectionUtil; + +/* + * This class keeps track of each SegmentInfos instance that + * is still "live", either because it corresponds to a + * segments_N file in the Directory (a "commit", i.e. a + * committed SegmentInfos) or because it's an in-memory + * SegmentInfos that a writer is actively updating but has + * not yet committed. This class uses simple reference + * counting to map the live SegmentInfos instances to + * individual files in the Directory. + * + * The same directory file may be referenced by more than + * one IndexCommit, i.e. more than one SegmentInfos. + * Therefore we count how many commits reference each file. + * When all the commits referencing a certain file have been + * deleted, the refcount for that file becomes zero, and the + * file is deleted. + * + * A separate deletion policy interface + * (IndexDeletionPolicy) is consulted on creation (onInit) + * and once per commit (onCommit), to decide when a commit + * should be removed. + * + * It is the business of the IndexDeletionPolicy to choose + * when to delete commit points. The actual mechanics of + * file deletion, retrying, etc, derived from the deletion + * of commit points is the business of the IndexFileDeleter. + * + * The current default deletion policy is {@link + * KeepOnlyLastCommitDeletionPolicy}, which removes all + * prior commits when a new commit has completed. This + * matches the behavior before 2.2. + * + * Note that you must hold the write.lock before + * instantiating this class. It opens segments_N file(s) + * directly with no retry logic. + */ + +final class IndexFileDeleter { + + /* Files that we tried to delete but failed (likely + * because they are open and we are running on Windows), + * so we will retry them again later: */ + private List deletable; + + /* Reference count for all files in the index. + * Counts how many existing commits reference a file. + **/ + private Map refCounts = new HashMap(); + + /* Holds all commits (segments_N) currently in the index. + * This will have just 1 commit if you are using the + * default delete policy (KeepOnlyLastCommitDeletionPolicy). + * Other policies may leave commit points live for longer + * in which case this list would be longer than 1: */ + private List commits = new ArrayList(); + + /* Holds files we had incref'd from the previous + * non-commit checkpoint: */ + private List> lastFiles = new ArrayList>(); + + /* Commits that the IndexDeletionPolicy have decided to delete: */ + private List commitsToDelete = new ArrayList(); + + private PrintStream infoStream; + private Directory directory; + private IndexDeletionPolicy policy; + + final boolean startingCommitDeleted; + private SegmentInfos lastSegmentInfos; + + /** Change to true to see details of reference counts when + * infoStream != null */ + public static boolean VERBOSE_REF_COUNTS = false; + + // Used only for assert + private final IndexWriter writer; + + void setInfoStream(PrintStream infoStream) { + this.infoStream = infoStream; + if (infoStream != null) { + message("setInfoStream deletionPolicy=" + policy); + } + } + + private void message(String message) { + infoStream.println("IFD [" + new Date() + "; " + Thread.currentThread().getName() + "]: " + message); + } + + // called only from assert + private boolean locked() { + return writer == null || Thread.holdsLock(writer); + } + + /** + * Initialize the deleter: find all previous commits in + * the Directory, incref the files they reference, call + * the policy to let it delete commits. This will remove + * any files not referenced by any of the commits. + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + */ + public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, PrintStream infoStream, IndexWriter writer) + throws CorruptIndexException, IOException { + + this.infoStream = infoStream; + this.writer = writer; + + final String currentSegmentsFile = segmentInfos.getCurrentSegmentFileName(); + + if (infoStream != null) { + message("init: current segments file is \"" + currentSegmentsFile + "\"; deletionPolicy=" + policy); + } + + this.policy = policy; + this.directory = directory; + + // First pass: walk the files and initialize our ref + // counts: + long currentGen = segmentInfos.getGeneration(); + IndexFileNameFilter filter = IndexFileNameFilter.getFilter(); + + CommitPoint currentCommitPoint = null; + String[] files = null; + try { + files = directory.listAll(); + } catch (NoSuchDirectoryException e) { + // it means the directory is empty, so ignore it. + files = new String[0]; + } + + for (String fileName : files) { + + if (filter.accept(null, fileName) && !fileName.equals(IndexFileNames.SEGMENTS_GEN)) { + + // Add this file to refCounts with initial count 0: + getRefCount(fileName); + + if (fileName.startsWith(IndexFileNames.SEGMENTS)) { + + // This is a commit (segments or segments_N), and + // it's valid (<= the max gen). Load it, then + // incref all files it refers to: + if (infoStream != null) { + message("init: load commit \"" + fileName + "\""); + } + SegmentInfos sis = new SegmentInfos(); + try { + sis.read(directory, fileName); + } catch (FileNotFoundException e) { + // LUCENE-948: on NFS (and maybe others), if + // you have writers switching back and forth + // between machines, it's very likely that the + // dir listing will be stale and will claim a + // file segments_X exists when in fact it + // doesn't. So, we catch this and handle it + // as if the file does not exist + if (infoStream != null) { + message("init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point"); + } + sis = null; + } catch (IOException e) { + if (SegmentInfos.generationFromSegmentsFileName(fileName) <= currentGen) { + throw e; + } else { + // Most likely we are opening an index that + // has an aborted "future" commit, so suppress + // exc in this case + sis = null; + } + } + if (sis != null) { + CommitPoint commitPoint = new CommitPoint(commitsToDelete, directory, sis); + if (sis.getGeneration() == segmentInfos.getGeneration()) { + currentCommitPoint = commitPoint; + } + commits.add(commitPoint); + incRef(sis, true); + + if (lastSegmentInfos == null || sis.getGeneration() > lastSegmentInfos.getGeneration()) { + lastSegmentInfos = sis; + } + } + } + } + } + + if (currentCommitPoint == null && currentSegmentsFile != null) { + // We did not in fact see the segments_N file + // corresponding to the segmentInfos that was passed + // in. Yet, it must exist, because our caller holds + // the write lock. This can happen when the directory + // listing was stale (eg when index accessed via NFS + // client with stale directory listing cache). So we + // try now to explicitly open this commit point: + SegmentInfos sis = new SegmentInfos(); + try { + sis.read(directory, currentSegmentsFile); + } catch (IOException e) { + throw new CorruptIndexException("failed to locate current segments_N file"); + } + if (infoStream != null) { + message("forced open of current segments file " + segmentInfos.getCurrentSegmentFileName()); + } + currentCommitPoint = new CommitPoint(commitsToDelete, directory, sis); + commits.add(currentCommitPoint); + incRef(sis, true); + } + + // We keep commits list in sorted order (oldest to newest): + CollectionUtil.mergeSort(commits); + + // Now delete anything with ref count at 0. These are + // presumably abandoned files eg due to crash of + // IndexWriter. + for(Map.Entry entry : refCounts.entrySet() ) { + RefCount rc = entry.getValue(); + final String fileName = entry.getKey(); + if (0 == rc.count) { + if (infoStream != null) { + message("init: removing unreferenced file \"" + fileName + "\""); + } + deleteFile(fileName); + } + } + + // Finally, give policy a chance to remove things on + // startup: + if (currentSegmentsFile != null) { + policy.onInit(commits); + } + + // Always protect the incoming segmentInfos since + // sometime it may not be the most recent commit + checkpoint(segmentInfos, false); + + startingCommitDeleted = currentCommitPoint == null ? false : currentCommitPoint.isDeleted(); + + deleteCommits(); + } + + public SegmentInfos getLastSegmentInfos() { + return lastSegmentInfos; + } + + /** + * Remove the CommitPoints in the commitsToDelete List by + * DecRef'ing all files from each SegmentInfos. + */ + private void deleteCommits() throws IOException { + + int size = commitsToDelete.size(); + + if (size > 0) { + + // First decref all files that had been referred to by + // the now-deleted commits: + for(int i=0;i writeTo) { + commits.remove(size-1); + size--; + } + } + } + + /** + * Writer calls this when it has hit an error and had to + * roll back, to tell us that there may now be + * unreferenced files in the filesystem. So we re-list + * the filesystem and delete such files. If segmentName + * is non-null, we will only delete files corresponding to + * that segment. + */ + public void refresh(String segmentName) throws IOException { + assert locked(); + + String[] files = directory.listAll(); + IndexFileNameFilter filter = IndexFileNameFilter.getFilter(); + String segmentPrefix1; + String segmentPrefix2; + if (segmentName != null) { + segmentPrefix1 = segmentName + "."; + segmentPrefix2 = segmentName + "_"; + } else { + segmentPrefix1 = null; + segmentPrefix2 = null; + } + + for(int i=0;i 0) { + for(int i=0;i 0) { + policy.onCommit(commits); + deleteCommits(); + } + } + + public void deletePendingFiles() throws IOException { + assert locked(); + if (deletable != null) { + List oldDeletable = deletable; + deletable = null; + int size = oldDeletable.size(); + for(int i=0;i lastFile : lastFiles) { + decRef(lastFile); + } + lastFiles.clear(); + + // Save files so we can decr on next checkpoint/commit: + lastFiles.add(segmentInfos.files(directory, false)); + } + } + + void incRef(SegmentInfos segmentInfos, boolean isCommit) throws IOException { + assert locked(); + // If this is a commit point, also incRef the + // segments_N file: + for( final String fileName: segmentInfos.files(directory, isCommit) ) { + incRef(fileName); + } + } + + void incRef(Collection files) throws IOException { + assert locked(); + for(final String file : files) { + incRef(file); + } + } + + void incRef(String fileName) throws IOException { + assert locked(); + RefCount rc = getRefCount(fileName); + if (infoStream != null && VERBOSE_REF_COUNTS) { + message(" IncRef \"" + fileName + "\": pre-incr count is " + rc.count); + } + rc.IncRef(); + } + + void decRef(Collection files) throws IOException { + assert locked(); + for(final String file : files) { + decRef(file); + } + } + + void decRef(String fileName) throws IOException { + assert locked(); + RefCount rc = getRefCount(fileName); + if (infoStream != null && VERBOSE_REF_COUNTS) { + message(" DecRef \"" + fileName + "\": pre-decr count is " + rc.count); + } + if (0 == rc.DecRef()) { + // This file is no longer referenced by any past + // commit points nor by the in-memory SegmentInfos: + deleteFile(fileName); + refCounts.remove(fileName); + } + } + + void decRef(SegmentInfos segmentInfos) throws IOException { + assert locked(); + for (final String file : segmentInfos.files(directory, false)) { + decRef(file); + } + } + + public boolean exists(String fileName) { + assert locked(); + if (!refCounts.containsKey(fileName)) { + return false; + } else { + return getRefCount(fileName).count > 0; + } + } + + private RefCount getRefCount(String fileName) { + assert locked(); + RefCount rc; + if (!refCounts.containsKey(fileName)) { + rc = new RefCount(fileName); + refCounts.put(fileName, rc); + } else { + rc = refCounts.get(fileName); + } + return rc; + } + + void deleteFiles(List files) throws IOException { + assert locked(); + for(final String file: files) { + deleteFile(file); + } + } + + /** Deletes the specified files, but only if they are new + * (have not yet been incref'd). */ + void deleteNewFiles(Collection files) throws IOException { + assert locked(); + for (final String fileName: files) { + if (!refCounts.containsKey(fileName)) { + if (infoStream != null) { + message("delete new file \"" + fileName + "\""); + } + deleteFile(fileName); + } + } + } + + void deleteFile(String fileName) + throws IOException { + assert locked(); + try { + if (infoStream != null) { + message("delete \"" + fileName + "\""); + } + directory.deleteFile(fileName); + } catch (IOException e) { // if delete fails + if (directory.fileExists(fileName)) { + + // Some operating systems (e.g. Windows) don't + // permit a file to be deleted while it is opened + // for read (e.g. by another process or thread). So + // we assume that when a delete fails it is because + // the file is open in another process, and queue + // the file for subsequent deletion. + + if (infoStream != null) { + message("unable to remove file \"" + fileName + "\": " + e.toString() + "; Will re-try later."); + } + if (deletable == null) { + deletable = new ArrayList(); + } + deletable.add(fileName); // add to deletable + } + } + } + + /** + * Tracks the reference count for a single index file: + */ + final private static class RefCount { + + // fileName used only for better assert error messages + final String fileName; + boolean initDone; + RefCount(String fileName) { + this.fileName = fileName; + } + + int count; + + public int IncRef() { + if (!initDone) { + initDone = true; + } else { + assert count > 0: Thread.currentThread().getName() + ": RefCount is 0 pre-increment for file \"" + fileName + "\""; + } + return ++count; + } + + public int DecRef() { + assert count > 0: Thread.currentThread().getName() + ": RefCount is 0 pre-decrement for file \"" + fileName + "\""; + return --count; + } + } + + /** + * Holds details for each commit point. This class is + * also passed to the deletion policy. Note: this class + * has a natural ordering that is inconsistent with + * equals. + */ + + final private static class CommitPoint extends IndexCommit { + + Collection files; + String segmentsFileName; + boolean deleted; + Directory directory; + Collection commitsToDelete; + long version; + long generation; + final Map userData; + private final int segmentCount; + + public CommitPoint(Collection commitsToDelete, Directory directory, SegmentInfos segmentInfos) throws IOException { + this.directory = directory; + this.commitsToDelete = commitsToDelete; + userData = segmentInfos.getUserData(); + segmentsFileName = segmentInfos.getCurrentSegmentFileName(); + version = segmentInfos.getVersion(); + generation = segmentInfos.getGeneration(); + files = Collections.unmodifiableCollection(segmentInfos.files(directory, true)); + segmentCount = segmentInfos.size(); + } + + @Override + public String toString() { + return "IndexFileDeleter.CommitPoint(" + segmentsFileName + ")"; + } + + @Override + public int getSegmentCount() { + return segmentCount; + } + + @Override + public String getSegmentsFileName() { + return segmentsFileName; + } + + @Override + public Collection getFileNames() throws IOException { + return files; + } + + @Override + public Directory getDirectory() { + return directory; + } + + @Override + public long getVersion() { + return version; + } + + @Override + public long getGeneration() { + return generation; + } + + @Override + public Map getUserData() { + return userData; + } + + /** + * Called only be the deletion policy, to remove this + * commit point from the index. + */ + @Override + public void delete() { + if (!deleted) { + deleted = true; + commitsToDelete.add(this); + } + } + + @Override + public boolean isDeleted() { + return deleted; + } + } +}