1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.store.Directory;
22 import java.io.IOException;
23 import java.io.FileNotFoundException;
24 import java.io.PrintStream;
27 import org.apache.lucene.store.NoSuchDirectoryException;
28 import org.apache.lucene.util.CollectionUtil;
31 * This class keeps track of each SegmentInfos instance that
32 * is still "live", either because it corresponds to a
33 * segments_N file in the Directory (a "commit", i.e. a
34 * committed SegmentInfos) or because it's an in-memory
35 * SegmentInfos that a writer is actively updating but has
36 * not yet committed. This class uses simple reference
37 * counting to map the live SegmentInfos instances to
38 * individual files in the Directory.
40 * The same directory file may be referenced by more than
41 * one IndexCommit, i.e. more than one SegmentInfos.
42 * Therefore we count how many commits reference each file.
43 * When all the commits referencing a certain file have been
44 * deleted, the refcount for that file becomes zero, and the
47 * A separate deletion policy interface
48 * (IndexDeletionPolicy) is consulted on creation (onInit)
49 * and once per commit (onCommit), to decide when a commit
52 * It is the business of the IndexDeletionPolicy to choose
53 * when to delete commit points. The actual mechanics of
54 * file deletion, retrying, etc, derived from the deletion
55 * of commit points is the business of the IndexFileDeleter.
57 * The current default deletion policy is {@link
58 * KeepOnlyLastCommitDeletionPolicy}, which removes all
59 * prior commits when a new commit has completed. This
60 * matches the behavior before 2.2.
62 * Note that you must hold the write.lock before
63 * instantiating this class. It opens segments_N file(s)
64 * directly with no retry logic.
67 final class IndexFileDeleter {
69 /* Files that we tried to delete but failed (likely
70 * because they are open and we are running on Windows),
71 * so we will retry them again later: */
72 private List<String> deletable;
74 /* Reference count for all files in the index.
75 * Counts how many existing commits reference a file.
77 private Map<String, RefCount> refCounts = new HashMap<String, RefCount>();
79 /* Holds all commits (segments_N) currently in the index.
80 * This will have just 1 commit if you are using the
81 * default delete policy (KeepOnlyLastCommitDeletionPolicy).
82 * Other policies may leave commit points live for longer
83 * in which case this list would be longer than 1: */
84 private List<CommitPoint> commits = new ArrayList<CommitPoint>();
86 /* Holds files we had incref'd from the previous
87 * non-commit checkpoint: */
88 private List<Collection<String>> lastFiles = new ArrayList<Collection<String>>();
90 /* Commits that the IndexDeletionPolicy have decided to delete: */
91 private List<CommitPoint> commitsToDelete = new ArrayList<CommitPoint>();
93 private PrintStream infoStream;
94 private Directory directory;
95 private IndexDeletionPolicy policy;
97 final boolean startingCommitDeleted;
98 private SegmentInfos lastSegmentInfos;
100 /** Change to true to see details of reference counts when
101 * infoStream != null */
102 public static boolean VERBOSE_REF_COUNTS = false;
104 // Used only for assert
105 private final IndexWriter writer;
107 void setInfoStream(PrintStream infoStream) {
108 this.infoStream = infoStream;
109 if (infoStream != null) {
110 message("setInfoStream deletionPolicy=" + policy);
114 private void message(String message) {
115 infoStream.println("IFD [" + new Date() + "; " + Thread.currentThread().getName() + "]: " + message);
118 // called only from assert
119 private boolean locked() {
120 return writer == null || Thread.holdsLock(writer);
124 * Initialize the deleter: find all previous commits in
125 * the Directory, incref the files they reference, call
126 * the policy to let it delete commits. This will remove
127 * any files not referenced by any of the commits.
128 * @throws CorruptIndexException if the index is corrupt
129 * @throws IOException if there is a low-level IO error
131 public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, PrintStream infoStream, IndexWriter writer)
132 throws CorruptIndexException, IOException {
134 this.infoStream = infoStream;
135 this.writer = writer;
137 final String currentSegmentsFile = segmentInfos.getCurrentSegmentFileName();
139 if (infoStream != null) {
140 message("init: current segments file is \"" + currentSegmentsFile + "\"; deletionPolicy=" + policy);
143 this.policy = policy;
144 this.directory = directory;
146 // First pass: walk the files and initialize our ref
148 long currentGen = segmentInfos.getGeneration();
149 IndexFileNameFilter filter = IndexFileNameFilter.getFilter();
151 CommitPoint currentCommitPoint = null;
152 String[] files = null;
154 files = directory.listAll();
155 } catch (NoSuchDirectoryException e) {
156 // it means the directory is empty, so ignore it.
157 files = new String[0];
160 for (String fileName : files) {
162 if (filter.accept(null, fileName) && !fileName.equals(IndexFileNames.SEGMENTS_GEN)) {
164 // Add this file to refCounts with initial count 0:
165 getRefCount(fileName);
167 if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
169 // This is a commit (segments or segments_N), and
170 // it's valid (<= the max gen). Load it, then
171 // incref all files it refers to:
172 if (infoStream != null) {
173 message("init: load commit \"" + fileName + "\"");
175 SegmentInfos sis = new SegmentInfos();
177 sis.read(directory, fileName);
178 } catch (FileNotFoundException e) {
179 // LUCENE-948: on NFS (and maybe others), if
180 // you have writers switching back and forth
181 // between machines, it's very likely that the
182 // dir listing will be stale and will claim a
183 // file segments_X exists when in fact it
184 // doesn't. So, we catch this and handle it
185 // as if the file does not exist
186 if (infoStream != null) {
187 message("init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point");
190 } catch (IOException e) {
191 if (SegmentInfos.generationFromSegmentsFileName(fileName) <= currentGen) {
194 // Most likely we are opening an index that
195 // has an aborted "future" commit, so suppress
201 CommitPoint commitPoint = new CommitPoint(commitsToDelete, directory, sis);
202 if (sis.getGeneration() == segmentInfos.getGeneration()) {
203 currentCommitPoint = commitPoint;
205 commits.add(commitPoint);
208 if (lastSegmentInfos == null || sis.getGeneration() > lastSegmentInfos.getGeneration()) {
209 lastSegmentInfos = sis;
216 if (currentCommitPoint == null && currentSegmentsFile != null) {
217 // We did not in fact see the segments_N file
218 // corresponding to the segmentInfos that was passed
219 // in. Yet, it must exist, because our caller holds
220 // the write lock. This can happen when the directory
221 // listing was stale (eg when index accessed via NFS
222 // client with stale directory listing cache). So we
223 // try now to explicitly open this commit point:
224 SegmentInfos sis = new SegmentInfos();
226 sis.read(directory, currentSegmentsFile);
227 } catch (IOException e) {
228 throw new CorruptIndexException("failed to locate current segments_N file");
230 if (infoStream != null) {
231 message("forced open of current segments file " + segmentInfos.getCurrentSegmentFileName());
233 currentCommitPoint = new CommitPoint(commitsToDelete, directory, sis);
234 commits.add(currentCommitPoint);
238 // We keep commits list in sorted order (oldest to newest):
239 CollectionUtil.mergeSort(commits);
241 // Now delete anything with ref count at 0. These are
242 // presumably abandoned files eg due to crash of
244 for(Map.Entry<String, RefCount> entry : refCounts.entrySet() ) {
245 RefCount rc = entry.getValue();
246 final String fileName = entry.getKey();
248 if (infoStream != null) {
249 message("init: removing unreferenced file \"" + fileName + "\"");
251 deleteFile(fileName);
255 // Finally, give policy a chance to remove things on
257 if (currentSegmentsFile != null) {
258 policy.onInit(commits);
261 // Always protect the incoming segmentInfos since
262 // sometime it may not be the most recent commit
263 checkpoint(segmentInfos, false);
265 startingCommitDeleted = currentCommitPoint == null ? false : currentCommitPoint.isDeleted();
270 public SegmentInfos getLastSegmentInfos() {
271 return lastSegmentInfos;
275 * Remove the CommitPoints in the commitsToDelete List by
276 * DecRef'ing all files from each SegmentInfos.
278 private void deleteCommits() throws IOException {
280 int size = commitsToDelete.size();
284 // First decref all files that had been referred to by
285 // the now-deleted commits:
286 for(int i=0;i<size;i++) {
287 CommitPoint commit = commitsToDelete.get(i);
288 if (infoStream != null) {
289 message("deleteCommits: now decRef commit \"" + commit.getSegmentsFileName() + "\"");
291 for (final String file : commit.files) {
295 commitsToDelete.clear();
297 // Now compact commits to remove deleted ones (preserving the sort):
298 size = commits.size();
301 while(readFrom < size) {
302 CommitPoint commit = commits.get(readFrom);
303 if (!commit.deleted) {
304 if (writeTo != readFrom) {
305 commits.set(writeTo, commits.get(readFrom));
312 while(size > writeTo) {
313 commits.remove(size-1);
320 * Writer calls this when it has hit an error and had to
321 * roll back, to tell us that there may now be
322 * unreferenced files in the filesystem. So we re-list
323 * the filesystem and delete such files. If segmentName
324 * is non-null, we will only delete files corresponding to
327 public void refresh(String segmentName) throws IOException {
330 String[] files = directory.listAll();
331 IndexFileNameFilter filter = IndexFileNameFilter.getFilter();
332 String segmentPrefix1;
333 String segmentPrefix2;
334 if (segmentName != null) {
335 segmentPrefix1 = segmentName + ".";
336 segmentPrefix2 = segmentName + "_";
338 segmentPrefix1 = null;
339 segmentPrefix2 = null;
342 for(int i=0;i<files.length;i++) {
343 String fileName = files[i];
344 if (filter.accept(null, fileName) &&
345 (segmentName == null || fileName.startsWith(segmentPrefix1) || fileName.startsWith(segmentPrefix2)) &&
346 !refCounts.containsKey(fileName) &&
347 !fileName.equals(IndexFileNames.SEGMENTS_GEN)) {
348 // Unreferenced file, so remove it
349 if (infoStream != null) {
350 message("refresh [prefix=" + segmentName + "]: removing newly created unreferenced file \"" + fileName + "\"");
352 deleteFile(fileName);
357 public void refresh() throws IOException {
358 // Set to null so that we regenerate the list of pending
359 // files; else we can accumulate same file more than
366 public void close() throws IOException {
367 // DecRef old files from the last checkpoint, if any:
369 int size = lastFiles.size();
371 for(int i=0;i<size;i++) {
372 decRef(lastFiles.get(i));
377 deletePendingFiles();
381 * Revisits the {@link IndexDeletionPolicy} by calling its
382 * {@link IndexDeletionPolicy#onCommit(List)} again with the known commits.
383 * This is useful in cases where a deletion policy which holds onto index
384 * commits is used. The application may know that some commits are not held by
385 * the deletion policy anymore and call
386 * {@link IndexWriter#deleteUnusedFiles()}, which will attempt to delete the
387 * unused commits again.
389 void revisitPolicy() throws IOException {
391 if (infoStream != null) {
392 message("now revisitPolicy");
395 if (commits.size() > 0) {
396 policy.onCommit(commits);
401 public void deletePendingFiles() throws IOException {
403 if (deletable != null) {
404 List<String> oldDeletable = deletable;
406 int size = oldDeletable.size();
407 for(int i=0;i<size;i++) {
408 if (infoStream != null) {
409 message("delete pending file " + oldDeletable.get(i));
411 deleteFile(oldDeletable.get(i));
417 * For definition of "check point" see IndexWriter comments:
418 * "Clarification: Check Points (and commits)".
420 * Writer calls this when it has made a "consistent
421 * change" to the index, meaning new files are written to
422 * the index and the in-memory SegmentInfos have been
423 * modified to point to those files.
425 * This may or may not be a commit (segments_N may or may
426 * not have been written).
428 * We simply incref the files referenced by the new
429 * SegmentInfos and decref the files we had previously
432 * If this is a commit, we also call the policy to give it
433 * a chance to remove other commits. If any commits are
434 * removed, we decref their files as well.
436 public void checkpoint(SegmentInfos segmentInfos, boolean isCommit) throws IOException {
439 if (infoStream != null) {
440 message("now checkpoint \"" + segmentInfos.getCurrentSegmentFileName() + "\" [" + segmentInfos.size() + " segments " + "; isCommit = " + isCommit + "]");
443 // Try again now to delete any previously un-deletable
444 // files (because they were in use, on Windows):
445 deletePendingFiles();
448 incRef(segmentInfos, isCommit);
451 // Append to our commits list:
452 commits.add(new CommitPoint(commitsToDelete, directory, segmentInfos));
454 // Tell policy so it can remove commits:
455 policy.onCommit(commits);
457 // Decref files for commits that were deleted by the policy:
460 // DecRef old files from the last checkpoint, if any:
461 for (Collection<String> lastFile : lastFiles) {
466 // Save files so we can decr on next checkpoint/commit:
467 lastFiles.add(segmentInfos.files(directory, false));
471 void incRef(SegmentInfos segmentInfos, boolean isCommit) throws IOException {
473 // If this is a commit point, also incRef the
475 for( final String fileName: segmentInfos.files(directory, isCommit) ) {
480 void incRef(Collection<String> files) throws IOException {
482 for(final String file : files) {
487 void incRef(String fileName) throws IOException {
489 RefCount rc = getRefCount(fileName);
490 if (infoStream != null && VERBOSE_REF_COUNTS) {
491 message(" IncRef \"" + fileName + "\": pre-incr count is " + rc.count);
496 void decRef(Collection<String> files) throws IOException {
498 for(final String file : files) {
503 void decRef(String fileName) throws IOException {
505 RefCount rc = getRefCount(fileName);
506 if (infoStream != null && VERBOSE_REF_COUNTS) {
507 message(" DecRef \"" + fileName + "\": pre-decr count is " + rc.count);
509 if (0 == rc.DecRef()) {
510 // This file is no longer referenced by any past
511 // commit points nor by the in-memory SegmentInfos:
512 deleteFile(fileName);
513 refCounts.remove(fileName);
517 void decRef(SegmentInfos segmentInfos) throws IOException {
519 for (final String file : segmentInfos.files(directory, false)) {
524 public boolean exists(String fileName) {
526 if (!refCounts.containsKey(fileName)) {
529 return getRefCount(fileName).count > 0;
533 private RefCount getRefCount(String fileName) {
536 if (!refCounts.containsKey(fileName)) {
537 rc = new RefCount(fileName);
538 refCounts.put(fileName, rc);
540 rc = refCounts.get(fileName);
545 void deleteFiles(List<String> files) throws IOException {
547 for(final String file: files) {
552 /** Deletes the specified files, but only if they are new
553 * (have not yet been incref'd). */
554 void deleteNewFiles(Collection<String> files) throws IOException {
556 for (final String fileName: files) {
557 if (!refCounts.containsKey(fileName)) {
558 if (infoStream != null) {
559 message("delete new file \"" + fileName + "\"");
561 deleteFile(fileName);
566 void deleteFile(String fileName)
570 if (infoStream != null) {
571 message("delete \"" + fileName + "\"");
573 directory.deleteFile(fileName);
574 } catch (IOException e) { // if delete fails
575 if (directory.fileExists(fileName)) {
577 // Some operating systems (e.g. Windows) don't
578 // permit a file to be deleted while it is opened
579 // for read (e.g. by another process or thread). So
580 // we assume that when a delete fails it is because
581 // the file is open in another process, and queue
582 // the file for subsequent deletion.
584 if (infoStream != null) {
585 message("unable to remove file \"" + fileName + "\": " + e.toString() + "; Will re-try later.");
587 if (deletable == null) {
588 deletable = new ArrayList<String>();
590 deletable.add(fileName); // add to deletable
596 * Tracks the reference count for a single index file:
598 final private static class RefCount {
600 // fileName used only for better assert error messages
601 final String fileName;
603 RefCount(String fileName) {
604 this.fileName = fileName;
609 public int IncRef() {
613 assert count > 0: Thread.currentThread().getName() + ": RefCount is 0 pre-increment for file \"" + fileName + "\"";
618 public int DecRef() {
619 assert count > 0: Thread.currentThread().getName() + ": RefCount is 0 pre-decrement for file \"" + fileName + "\"";
625 * Holds details for each commit point. This class is
626 * also passed to the deletion policy. Note: this class
627 * has a natural ordering that is inconsistent with
631 final private static class CommitPoint extends IndexCommit {
633 Collection<String> files;
634 String segmentsFileName;
637 Collection<CommitPoint> commitsToDelete;
640 final boolean isOptimized;
641 final Map<String,String> userData;
643 public CommitPoint(Collection<CommitPoint> commitsToDelete, Directory directory, SegmentInfos segmentInfos) throws IOException {
644 this.directory = directory;
645 this.commitsToDelete = commitsToDelete;
646 userData = segmentInfos.getUserData();
647 segmentsFileName = segmentInfos.getCurrentSegmentFileName();
648 version = segmentInfos.getVersion();
649 generation = segmentInfos.getGeneration();
650 files = Collections.unmodifiableCollection(segmentInfos.files(directory, true));
651 isOptimized = segmentInfos.size() == 1 && !segmentInfos.info(0).hasDeletions();
655 public String toString() {
656 return "IndexFileDeleter.CommitPoint(" + segmentsFileName + ")";
660 public boolean isOptimized() {
665 public String getSegmentsFileName() {
666 return segmentsFileName;
670 public Collection<String> getFileNames() throws IOException {
675 public Directory getDirectory() {
680 public long getVersion() {
685 public long getGeneration() {
690 public Map<String,String> getUserData() {
695 * Called only be the deletion policy, to remove this
696 * commit point from the index.
699 public void delete() {
702 commitsToDelete.add(this);
707 public boolean isDeleted() {