1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.FileNotFoundException;
21 import java.io.IOException;
22 import java.io.PrintStream;
23 import java.util.ArrayList;
24 import java.util.Collection;
25 import java.util.Collections;
26 import java.util.Date;
27 import java.util.HashMap;
28 import java.util.List;
31 import org.apache.lucene.store.Directory;
32 import org.apache.lucene.store.NoSuchDirectoryException;
33 import org.apache.lucene.util.CollectionUtil;
36 * This class keeps track of each SegmentInfos instance that
37 * is still "live", either because it corresponds to a
38 * segments_N file in the Directory (a "commit", i.e. a
39 * committed SegmentInfos) or because it's an in-memory
40 * SegmentInfos that a writer is actively updating but has
41 * not yet committed. This class uses simple reference
42 * counting to map the live SegmentInfos instances to
43 * individual files in the Directory.
45 * The same directory file may be referenced by more than
46 * one IndexCommit, i.e. more than one SegmentInfos.
47 * Therefore we count how many commits reference each file.
48 * When all the commits referencing a certain file have been
49 * deleted, the refcount for that file becomes zero, and the
52 * A separate deletion policy interface
53 * (IndexDeletionPolicy) is consulted on creation (onInit)
54 * and once per commit (onCommit), to decide when a commit
57 * It is the business of the IndexDeletionPolicy to choose
58 * when to delete commit points. The actual mechanics of
59 * file deletion, retrying, etc, derived from the deletion
60 * of commit points is the business of the IndexFileDeleter.
62 * The current default deletion policy is {@link
63 * KeepOnlyLastCommitDeletionPolicy}, which removes all
64 * prior commits when a new commit has completed. This
65 * matches the behavior before 2.2.
67 * Note that you must hold the write.lock before
68 * instantiating this class. It opens segments_N file(s)
69 * directly with no retry logic.
72 final class IndexFileDeleter {
74 /* Files that we tried to delete but failed (likely
75 * because they are open and we are running on Windows),
76 * so we will retry them again later: */
77 private List<String> deletable;
79 /* Reference count for all files in the index.
80 * Counts how many existing commits reference a file.
82 private Map<String, RefCount> refCounts = new HashMap<String, RefCount>();
84 /* Holds all commits (segments_N) currently in the index.
85 * This will have just 1 commit if you are using the
86 * default delete policy (KeepOnlyLastCommitDeletionPolicy).
87 * Other policies may leave commit points live for longer
88 * in which case this list would be longer than 1: */
89 private List<CommitPoint> commits = new ArrayList<CommitPoint>();
91 /* Holds files we had incref'd from the previous
92 * non-commit checkpoint: */
93 private List<Collection<String>> lastFiles = new ArrayList<Collection<String>>();
95 /* Commits that the IndexDeletionPolicy have decided to delete: */
96 private List<CommitPoint> commitsToDelete = new ArrayList<CommitPoint>();
98 private PrintStream infoStream;
99 private Directory directory;
100 private IndexDeletionPolicy policy;
102 final boolean startingCommitDeleted;
103 private SegmentInfos lastSegmentInfos;
105 /** Change to true to see details of reference counts when
106 * infoStream != null */
107 public static boolean VERBOSE_REF_COUNTS = false;
109 // Used only for assert
110 private final IndexWriter writer;
112 void setInfoStream(PrintStream infoStream) {
113 this.infoStream = infoStream;
114 if (infoStream != null) {
115 message("setInfoStream deletionPolicy=" + policy);
119 private void message(String message) {
120 infoStream.println("IFD [" + new Date() + "; " + Thread.currentThread().getName() + "]: " + message);
123 // called only from assert
124 private boolean locked() {
125 return writer == null || Thread.holdsLock(writer);
129 * Initialize the deleter: find all previous commits in
130 * the Directory, incref the files they reference, call
131 * the policy to let it delete commits. This will remove
132 * any files not referenced by any of the commits.
133 * @throws CorruptIndexException if the index is corrupt
134 * @throws IOException if there is a low-level IO error
136 public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, PrintStream infoStream, IndexWriter writer)
137 throws CorruptIndexException, IOException {
139 this.infoStream = infoStream;
140 this.writer = writer;
142 final String currentSegmentsFile = segmentInfos.getCurrentSegmentFileName();
144 if (infoStream != null) {
145 message("init: current segments file is \"" + currentSegmentsFile + "\"; deletionPolicy=" + policy);
148 this.policy = policy;
149 this.directory = directory;
151 // First pass: walk the files and initialize our ref
153 long currentGen = segmentInfos.getGeneration();
154 IndexFileNameFilter filter = IndexFileNameFilter.getFilter();
156 CommitPoint currentCommitPoint = null;
157 String[] files = null;
159 files = directory.listAll();
160 } catch (NoSuchDirectoryException e) {
161 // it means the directory is empty, so ignore it.
162 files = new String[0];
165 for (String fileName : files) {
167 if (filter.accept(null, fileName) && !fileName.equals(IndexFileNames.SEGMENTS_GEN)) {
169 // Add this file to refCounts with initial count 0:
170 getRefCount(fileName);
172 if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
174 // This is a commit (segments or segments_N), and
175 // it's valid (<= the max gen). Load it, then
176 // incref all files it refers to:
177 if (infoStream != null) {
178 message("init: load commit \"" + fileName + "\"");
180 SegmentInfos sis = new SegmentInfos();
182 sis.read(directory, fileName);
183 } catch (FileNotFoundException e) {
184 // LUCENE-948: on NFS (and maybe others), if
185 // you have writers switching back and forth
186 // between machines, it's very likely that the
187 // dir listing will be stale and will claim a
188 // file segments_X exists when in fact it
189 // doesn't. So, we catch this and handle it
190 // as if the file does not exist
191 if (infoStream != null) {
192 message("init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point");
195 } catch (IOException e) {
196 if (SegmentInfos.generationFromSegmentsFileName(fileName) <= currentGen) {
199 // Most likely we are opening an index that
200 // has an aborted "future" commit, so suppress
206 CommitPoint commitPoint = new CommitPoint(commitsToDelete, directory, sis);
207 if (sis.getGeneration() == segmentInfos.getGeneration()) {
208 currentCommitPoint = commitPoint;
210 commits.add(commitPoint);
213 if (lastSegmentInfos == null || sis.getGeneration() > lastSegmentInfos.getGeneration()) {
214 lastSegmentInfos = sis;
221 if (currentCommitPoint == null && currentSegmentsFile != null) {
222 // We did not in fact see the segments_N file
223 // corresponding to the segmentInfos that was passed
224 // in. Yet, it must exist, because our caller holds
225 // the write lock. This can happen when the directory
226 // listing was stale (eg when index accessed via NFS
227 // client with stale directory listing cache). So we
228 // try now to explicitly open this commit point:
229 SegmentInfos sis = new SegmentInfos();
231 sis.read(directory, currentSegmentsFile);
232 } catch (IOException e) {
233 throw new CorruptIndexException("failed to locate current segments_N file");
235 if (infoStream != null) {
236 message("forced open of current segments file " + segmentInfos.getCurrentSegmentFileName());
238 currentCommitPoint = new CommitPoint(commitsToDelete, directory, sis);
239 commits.add(currentCommitPoint);
243 // We keep commits list in sorted order (oldest to newest):
244 CollectionUtil.mergeSort(commits);
246 // Now delete anything with ref count at 0. These are
247 // presumably abandoned files eg due to crash of
249 for(Map.Entry<String, RefCount> entry : refCounts.entrySet() ) {
250 RefCount rc = entry.getValue();
251 final String fileName = entry.getKey();
253 if (infoStream != null) {
254 message("init: removing unreferenced file \"" + fileName + "\"");
256 deleteFile(fileName);
260 // Finally, give policy a chance to remove things on
262 if (currentSegmentsFile != null) {
263 policy.onInit(commits);
266 // Always protect the incoming segmentInfos since
267 // sometime it may not be the most recent commit
268 checkpoint(segmentInfos, false);
270 startingCommitDeleted = currentCommitPoint == null ? false : currentCommitPoint.isDeleted();
275 public SegmentInfos getLastSegmentInfos() {
276 return lastSegmentInfos;
280 * Remove the CommitPoints in the commitsToDelete List by
281 * DecRef'ing all files from each SegmentInfos.
283 private void deleteCommits() throws IOException {
285 int size = commitsToDelete.size();
289 // First decref all files that had been referred to by
290 // the now-deleted commits:
291 for(int i=0;i<size;i++) {
292 CommitPoint commit = commitsToDelete.get(i);
293 if (infoStream != null) {
294 message("deleteCommits: now decRef commit \"" + commit.getSegmentsFileName() + "\"");
296 for (final String file : commit.files) {
300 commitsToDelete.clear();
302 // Now compact commits to remove deleted ones (preserving the sort):
303 size = commits.size();
306 while(readFrom < size) {
307 CommitPoint commit = commits.get(readFrom);
308 if (!commit.deleted) {
309 if (writeTo != readFrom) {
310 commits.set(writeTo, commits.get(readFrom));
317 while(size > writeTo) {
318 commits.remove(size-1);
325 * Writer calls this when it has hit an error and had to
326 * roll back, to tell us that there may now be
327 * unreferenced files in the filesystem. So we re-list
328 * the filesystem and delete such files. If segmentName
329 * is non-null, we will only delete files corresponding to
332 public void refresh(String segmentName) throws IOException {
335 String[] files = directory.listAll();
336 IndexFileNameFilter filter = IndexFileNameFilter.getFilter();
337 String segmentPrefix1;
338 String segmentPrefix2;
339 if (segmentName != null) {
340 segmentPrefix1 = segmentName + ".";
341 segmentPrefix2 = segmentName + "_";
343 segmentPrefix1 = null;
344 segmentPrefix2 = null;
347 for(int i=0;i<files.length;i++) {
348 String fileName = files[i];
349 if (filter.accept(null, fileName) &&
350 (segmentName == null || fileName.startsWith(segmentPrefix1) || fileName.startsWith(segmentPrefix2)) &&
351 !refCounts.containsKey(fileName) &&
352 !fileName.equals(IndexFileNames.SEGMENTS_GEN)) {
353 // Unreferenced file, so remove it
354 if (infoStream != null) {
355 message("refresh [prefix=" + segmentName + "]: removing newly created unreferenced file \"" + fileName + "\"");
357 deleteFile(fileName);
362 public void refresh() throws IOException {
363 // Set to null so that we regenerate the list of pending
364 // files; else we can accumulate same file more than
371 public void close() throws IOException {
372 // DecRef old files from the last checkpoint, if any:
374 int size = lastFiles.size();
376 for(int i=0;i<size;i++) {
377 decRef(lastFiles.get(i));
382 deletePendingFiles();
386 * Revisits the {@link IndexDeletionPolicy} by calling its
387 * {@link IndexDeletionPolicy#onCommit(List)} again with the known commits.
388 * This is useful in cases where a deletion policy which holds onto index
389 * commits is used. The application may know that some commits are not held by
390 * the deletion policy anymore and call
391 * {@link IndexWriter#deleteUnusedFiles()}, which will attempt to delete the
392 * unused commits again.
394 void revisitPolicy() throws IOException {
396 if (infoStream != null) {
397 message("now revisitPolicy");
400 if (commits.size() > 0) {
401 policy.onCommit(commits);
406 public void deletePendingFiles() throws IOException {
408 if (deletable != null) {
409 List<String> oldDeletable = deletable;
411 int size = oldDeletable.size();
412 for(int i=0;i<size;i++) {
413 if (infoStream != null) {
414 message("delete pending file " + oldDeletable.get(i));
416 deleteFile(oldDeletable.get(i));
422 * For definition of "check point" see IndexWriter comments:
423 * "Clarification: Check Points (and commits)".
425 * Writer calls this when it has made a "consistent
426 * change" to the index, meaning new files are written to
427 * the index and the in-memory SegmentInfos have been
428 * modified to point to those files.
430 * This may or may not be a commit (segments_N may or may
431 * not have been written).
433 * We simply incref the files referenced by the new
434 * SegmentInfos and decref the files we had previously
437 * If this is a commit, we also call the policy to give it
438 * a chance to remove other commits. If any commits are
439 * removed, we decref their files as well.
441 public void checkpoint(SegmentInfos segmentInfos, boolean isCommit) throws IOException {
444 if (infoStream != null) {
445 message("now checkpoint \"" + segmentInfos.getCurrentSegmentFileName() + "\" [" + segmentInfos.size() + " segments " + "; isCommit = " + isCommit + "]");
448 // Try again now to delete any previously un-deletable
449 // files (because they were in use, on Windows):
450 deletePendingFiles();
453 incRef(segmentInfos, isCommit);
456 // Append to our commits list:
457 commits.add(new CommitPoint(commitsToDelete, directory, segmentInfos));
459 // Tell policy so it can remove commits:
460 policy.onCommit(commits);
462 // Decref files for commits that were deleted by the policy:
465 // DecRef old files from the last checkpoint, if any:
466 for (Collection<String> lastFile : lastFiles) {
471 // Save files so we can decr on next checkpoint/commit:
472 lastFiles.add(segmentInfos.files(directory, false));
476 void incRef(SegmentInfos segmentInfos, boolean isCommit) throws IOException {
478 // If this is a commit point, also incRef the
480 for( final String fileName: segmentInfos.files(directory, isCommit) ) {
485 void incRef(Collection<String> files) throws IOException {
487 for(final String file : files) {
492 void incRef(String fileName) throws IOException {
494 RefCount rc = getRefCount(fileName);
495 if (infoStream != null && VERBOSE_REF_COUNTS) {
496 message(" IncRef \"" + fileName + "\": pre-incr count is " + rc.count);
501 void decRef(Collection<String> files) throws IOException {
503 for(final String file : files) {
508 void decRef(String fileName) throws IOException {
510 RefCount rc = getRefCount(fileName);
511 if (infoStream != null && VERBOSE_REF_COUNTS) {
512 message(" DecRef \"" + fileName + "\": pre-decr count is " + rc.count);
514 if (0 == rc.DecRef()) {
515 // This file is no longer referenced by any past
516 // commit points nor by the in-memory SegmentInfos:
517 deleteFile(fileName);
518 refCounts.remove(fileName);
522 void decRef(SegmentInfos segmentInfos) throws IOException {
524 for (final String file : segmentInfos.files(directory, false)) {
529 public boolean exists(String fileName) {
531 if (!refCounts.containsKey(fileName)) {
534 return getRefCount(fileName).count > 0;
538 private RefCount getRefCount(String fileName) {
541 if (!refCounts.containsKey(fileName)) {
542 rc = new RefCount(fileName);
543 refCounts.put(fileName, rc);
545 rc = refCounts.get(fileName);
550 void deleteFiles(List<String> files) throws IOException {
552 for(final String file: files) {
557 /** Deletes the specified files, but only if they are new
558 * (have not yet been incref'd). */
559 void deleteNewFiles(Collection<String> files) throws IOException {
561 for (final String fileName: files) {
562 if (!refCounts.containsKey(fileName)) {
563 if (infoStream != null) {
564 message("delete new file \"" + fileName + "\"");
566 deleteFile(fileName);
571 void deleteFile(String fileName)
575 if (infoStream != null) {
576 message("delete \"" + fileName + "\"");
578 directory.deleteFile(fileName);
579 } catch (IOException e) { // if delete fails
580 if (directory.fileExists(fileName)) {
582 // Some operating systems (e.g. Windows) don't
583 // permit a file to be deleted while it is opened
584 // for read (e.g. by another process or thread). So
585 // we assume that when a delete fails it is because
586 // the file is open in another process, and queue
587 // the file for subsequent deletion.
589 if (infoStream != null) {
590 message("unable to remove file \"" + fileName + "\": " + e.toString() + "; Will re-try later.");
592 if (deletable == null) {
593 deletable = new ArrayList<String>();
595 deletable.add(fileName); // add to deletable
601 * Tracks the reference count for a single index file:
603 final private static class RefCount {
605 // fileName used only for better assert error messages
606 final String fileName;
608 RefCount(String fileName) {
609 this.fileName = fileName;
614 public int IncRef() {
618 assert count > 0: Thread.currentThread().getName() + ": RefCount is 0 pre-increment for file \"" + fileName + "\"";
623 public int DecRef() {
624 assert count > 0: Thread.currentThread().getName() + ": RefCount is 0 pre-decrement for file \"" + fileName + "\"";
630 * Holds details for each commit point. This class is
631 * also passed to the deletion policy. Note: this class
632 * has a natural ordering that is inconsistent with
636 final private static class CommitPoint extends IndexCommit {
638 Collection<String> files;
639 String segmentsFileName;
642 Collection<CommitPoint> commitsToDelete;
645 final Map<String,String> userData;
646 private final int segmentCount;
648 public CommitPoint(Collection<CommitPoint> commitsToDelete, Directory directory, SegmentInfos segmentInfos) throws IOException {
649 this.directory = directory;
650 this.commitsToDelete = commitsToDelete;
651 userData = segmentInfos.getUserData();
652 segmentsFileName = segmentInfos.getCurrentSegmentFileName();
653 version = segmentInfos.getVersion();
654 generation = segmentInfos.getGeneration();
655 files = Collections.unmodifiableCollection(segmentInfos.files(directory, true));
656 segmentCount = segmentInfos.size();
660 public String toString() {
661 return "IndexFileDeleter.CommitPoint(" + segmentsFileName + ")";
665 public int getSegmentCount() {
670 public String getSegmentsFileName() {
671 return segmentsFileName;
675 public Collection<String> getFileNames() throws IOException {
680 public Directory getDirectory() {
685 public long getVersion() {
690 public long getGeneration() {
695 public Map<String,String> getUserData() {
700 * Called only be the deletion policy, to remove this
701 * commit point from the index.
704 public void delete() {
707 commitsToDelete.add(this);
712 public boolean isDeleted() {