1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.search.IndexSearcher;
21 import org.apache.lucene.search.TermQuery;
22 import org.apache.lucene.store.FSDirectory;
23 import org.apache.lucene.store.Directory;
24 import org.apache.lucene.store.IndexInput;
25 import org.apache.lucene.util.StringHelper;
26 import org.apache.lucene.document.AbstractField; // for javadocs
27 import org.apache.lucene.document.Document;
29 import java.text.NumberFormat;
30 import java.io.PrintStream;
31 import java.io.IOException;
33 import java.util.Collection;
35 import java.util.Comparator;
36 import java.util.List;
37 import java.util.ArrayList;
41 * Basic tool and API to check the health of an index and
42 * write a new segments file that removes reference to
43 * problematic segments.
45 * <p>As this tool checks every byte in the index, on a large
46 * index it can take quite a long time to run.
48 * @lucene.experimental Please make a complete backup of your
49 * index before using this to fix your index!
51 public class CheckIndex {
53 private PrintStream infoStream;
54 private Directory dir;
57 * Returned from {@link #checkIndex()} detailing the health and status of the index.
59 * @lucene.experimental
62 public static class Status {
64 /** True if no problems were found with the index. */
67 /** True if we were unable to locate and load the segments_N file. */
68 public boolean missingSegments;
70 /** True if we were unable to open the segments_N file. */
71 public boolean cantOpenSegments;
73 /** True if we were unable to read the version number from segments_N file. */
74 public boolean missingSegmentVersion;
76 /** Name of latest segments_N file in the index. */
77 public String segmentsFileName;
79 /** Number of segments in the index. */
80 public int numSegments;
82 /** String description of the version of the index. */
83 public String segmentFormat;
85 /** Empty unless you passed specific segments list to check as optional 3rd argument.
86 * @see CheckIndex#checkIndex(List) */
87 public List<String> segmentsChecked = new ArrayList<String>();
89 /** True if the index was created with a newer version of Lucene than the CheckIndex tool. */
90 public boolean toolOutOfDate;
92 /** List of {@link SegmentInfoStatus} instances, detailing status of each segment. */
93 public List<SegmentInfoStatus> segmentInfos = new ArrayList<SegmentInfoStatus>();
95 /** Directory index is in. */
99 * SegmentInfos instance containing only segments that
100 * had no problems (this is used with the {@link CheckIndex#fixIndex}
101 * method to repair the index.
103 SegmentInfos newSegments;
105 /** How many documents will be lost to bad segments. */
106 public int totLoseDocCount;
108 /** How many bad segments were found. */
109 public int numBadSegments;
111 /** True if we checked only specific segments ({@link
112 * #checkIndex(List)}) was called with non-null
114 public boolean partial;
116 /** The greatest segment name. */
117 public int maxSegmentName;
119 /** Whether the SegmentInfos.counter is greater than any of the segments' names. */
120 public boolean validCounter;
122 /** Holds the userData of the last commit in the index */
123 public Map<String, String> userData;
125 /** Holds the status of each segment in the index.
126 * See {@link #segmentInfos}.
128 * <p><b>WARNING</b>: this API is new and experimental and is
129 * subject to suddenly change in the next release.
131 public static class SegmentInfoStatus {
132 /** Name of the segment. */
135 /** Document count (does not take deletions into account). */
138 /** True if segment is compound file format. */
139 public boolean compound;
141 /** Number of files referenced by this segment. */
144 /** Net size (MB) of the files referenced by this
146 public double sizeMB;
148 /** Doc store offset, if this segment shares the doc
149 * store files (stored fields and term vectors) with
150 * other segments. This is -1 if it does not share. */
151 public int docStoreOffset = -1;
153 /** String of the shared doc store segment, or null if
154 * this segment does not share the doc store files. */
155 public String docStoreSegment;
157 /** True if the shared doc store files are compound file
159 public boolean docStoreCompoundFile;
161 /** True if this segment has pending deletions. */
162 public boolean hasDeletions;
164 /** Name of the current deletions file name. */
165 public String deletionsFileName;
167 /** Number of deleted documents. */
168 public int numDeleted;
170 /** True if we were able to open a SegmentReader on this
172 public boolean openReaderPassed;
174 /** Number of fields in this segment. */
177 /** True if at least one of the fields in this segment
179 * @see AbstractField#setIndexOptions(org.apache.lucene.index.FieldInfo.IndexOptions) */
180 public boolean hasProx;
182 /** Map that includes certain
183 * debugging details that IndexWriter records into
184 * each segment it creates */
185 public Map<String,String> diagnostics;
187 /** Status for testing of field norms (null if field norms could not be tested). */
188 public FieldNormStatus fieldNormStatus;
190 /** Status for testing of indexed terms (null if indexed terms could not be tested). */
191 public TermIndexStatus termIndexStatus;
193 /** Status for testing of stored fields (null if stored fields could not be tested). */
194 public StoredFieldStatus storedFieldStatus;
196 /** Status for testing of term vectors (null if term vectors could not be tested). */
197 public TermVectorStatus termVectorStatus;
201 * Status from testing field norms.
203 public static final class FieldNormStatus {
204 /** Number of fields successfully tested */
205 public long totFields = 0L;
207 /** Exception thrown during term index test (null on success) */
208 public Throwable error = null;
212 * Status from testing term index.
214 public static final class TermIndexStatus {
215 /** Total term count */
216 public long termCount = 0L;
218 /** Total frequency across all terms. */
219 public long totFreq = 0L;
221 /** Total number of positions. */
222 public long totPos = 0L;
224 /** Exception thrown during term index test (null on success) */
225 public Throwable error = null;
229 * Status from testing stored fields.
231 public static final class StoredFieldStatus {
233 /** Number of documents tested. */
234 public int docCount = 0;
236 /** Total number of stored fields tested. */
237 public long totFields = 0;
239 /** Exception thrown during stored fields test (null on success) */
240 public Throwable error = null;
244 * Status from testing stored fields.
246 public static final class TermVectorStatus {
248 /** Number of documents tested. */
249 public int docCount = 0;
251 /** Total number of term vectors tested. */
252 public long totVectors = 0;
254 /** Exception thrown during term vector test (null on success) */
255 public Throwable error = null;
259 /** Create a new CheckIndex on the directory. */
260 public CheckIndex(Directory dir) {
265 /** Set infoStream where messages should go. If null, no
266 * messages are printed */
267 public void setInfoStream(PrintStream out) {
271 private void msg(String msg) {
272 if (infoStream != null)
273 infoStream.println(msg);
276 private static class MySegmentTermDocs extends SegmentTermDocs {
280 MySegmentTermDocs(SegmentReader p) {
285 public void seek(Term term) throws IOException {
291 protected void skippingDoc() throws IOException {
296 /** Returns a {@link Status} instance detailing
297 * the state of the index.
299 * <p>As this method checks every byte in the index, on a large
300 * index it can take quite a long time to run.
302 * <p><b>WARNING</b>: make sure
303 * you only call this when the index is not opened by any
305 public Status checkIndex() throws IOException {
306 return checkIndex(null);
309 /** Returns a {@link Status} instance detailing
310 * the state of the index.
312 * @param onlySegments list of specific segment names to check
314 * <p>As this method checks every byte in the specified
315 * segments, on a large index it can take quite a long
318 * <p><b>WARNING</b>: make sure
319 * you only call this when the index is not opened by any
321 public Status checkIndex(List<String> onlySegments) throws IOException {
322 NumberFormat nf = NumberFormat.getInstance();
323 SegmentInfos sis = new SegmentInfos();
324 Status result = new Status();
328 } catch (Throwable t) {
329 msg("ERROR: could not read any segments file in directory");
330 result.missingSegments = true;
331 if (infoStream != null)
332 t.printStackTrace(infoStream);
336 // find the oldest and newest segment versions
337 String oldest = Integer.toString(Integer.MAX_VALUE), newest = Integer.toString(Integer.MIN_VALUE);
338 String oldSegs = null;
339 boolean foundNonNullVersion = false;
340 Comparator<String> versionComparator = StringHelper.getVersionComparator();
341 for (SegmentInfo si : sis) {
342 String version = si.getVersion();
343 if (version == null) {
346 } else if (version.equals("2.x")) {
347 // an old segment that was 'touched' by 3.1+ code
350 foundNonNullVersion = true;
351 if (versionComparator.compare(version, oldest) < 0) {
354 if (versionComparator.compare(version, newest) > 0) {
360 final int numSegments = sis.size();
361 final String segmentsFileName = sis.getCurrentSegmentFileName();
362 IndexInput input = null;
364 input = dir.openInput(segmentsFileName);
365 } catch (Throwable t) {
366 msg("ERROR: could not open segments file in directory");
367 if (infoStream != null)
368 t.printStackTrace(infoStream);
369 result.cantOpenSegments = true;
374 format = input.readInt();
375 } catch (Throwable t) {
376 msg("ERROR: could not read segment file version in directory");
377 if (infoStream != null)
378 t.printStackTrace(infoStream);
379 result.missingSegmentVersion = true;
387 boolean skip = false;
389 if (format == SegmentInfos.FORMAT)
390 sFormat = "FORMAT [Lucene Pre-2.1]";
391 if (format == SegmentInfos.FORMAT_LOCKLESS)
392 sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
393 else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
394 sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
395 else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
396 sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
398 if (format == SegmentInfos.FORMAT_CHECKSUM)
399 sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
400 else if (format == SegmentInfos.FORMAT_DEL_COUNT)
401 sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
402 else if (format == SegmentInfos.FORMAT_HAS_PROX)
403 sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
404 else if (format == SegmentInfos.FORMAT_USER_DATA)
405 sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
406 else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
407 sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
408 else if (format == SegmentInfos.FORMAT_HAS_VECTORS)
409 sFormat = "FORMAT_HAS_VECTORS [Lucene 3.1]";
410 else if (format == SegmentInfos.FORMAT_3_1)
411 sFormat = "FORMAT_3_1 [Lucene 3.1+]";
412 else if (format == SegmentInfos.CURRENT_FORMAT)
413 throw new RuntimeException("BUG: You should update this tool!");
414 else if (format < SegmentInfos.CURRENT_FORMAT) {
415 sFormat = "int=" + format + " [newer version of Lucene than this tool]";
418 sFormat = format + " [Lucene 1.3 or prior]";
422 result.segmentsFileName = segmentsFileName;
423 result.numSegments = numSegments;
424 result.segmentFormat = sFormat;
425 result.userData = sis.getUserData();
426 String userDataString;
427 if (sis.getUserData().size() > 0) {
428 userDataString = " userData=" + sis.getUserData();
433 String versionString = null;
434 if (oldSegs != null) {
435 if (foundNonNullVersion) {
436 versionString = "versions=[" + oldSegs + " .. " + newest + "]";
438 versionString = "version=" + oldSegs;
441 versionString = oldest.equals(newest) ? ( "version=" + oldest ) : ("versions=[" + oldest + " .. " + newest + "]");
444 msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments
445 + " " + versionString + " format=" + sFormat + userDataString);
447 if (onlySegments != null) {
448 result.partial = true;
449 if (infoStream != null)
450 infoStream.print("\nChecking only these segments:");
451 for (String s : onlySegments) {
452 if (infoStream != null)
453 infoStream.print(" " + s);
455 result.segmentsChecked.addAll(onlySegments);
460 msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
461 result.toolOutOfDate = true;
466 result.newSegments = (SegmentInfos) sis.clone();
467 result.newSegments.clear();
468 result.maxSegmentName = -1;
470 for(int i=0;i<numSegments;i++) {
471 final SegmentInfo info = sis.info(i);
472 int segmentName = Integer.parseInt(info.name.substring(1), Character.MAX_RADIX);
473 if (segmentName > result.maxSegmentName) {
474 result.maxSegmentName = segmentName;
476 if (onlySegments != null && !onlySegments.contains(info.name))
478 Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
479 result.segmentInfos.add(segInfoStat);
480 msg(" " + (1+i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
481 segInfoStat.name = info.name;
482 segInfoStat.docCount = info.docCount;
484 int toLoseDocCount = info.docCount;
486 SegmentReader reader = null;
489 msg(" compound=" + info.getUseCompoundFile());
490 segInfoStat.compound = info.getUseCompoundFile();
491 msg(" hasProx=" + info.getHasProx());
492 segInfoStat.hasProx = info.getHasProx();
493 msg(" numFiles=" + info.files().size());
494 segInfoStat.numFiles = info.files().size();
495 segInfoStat.sizeMB = info.sizeInBytes(true)/(1024.*1024.);
496 msg(" size (MB)=" + nf.format(segInfoStat.sizeMB));
497 Map<String,String> diagnostics = info.getDiagnostics();
498 segInfoStat.diagnostics = diagnostics;
499 if (diagnostics.size() > 0) {
500 msg(" diagnostics = " + diagnostics);
503 final int docStoreOffset = info.getDocStoreOffset();
504 if (docStoreOffset != -1) {
505 msg(" docStoreOffset=" + docStoreOffset);
506 segInfoStat.docStoreOffset = docStoreOffset;
507 msg(" docStoreSegment=" + info.getDocStoreSegment());
508 segInfoStat.docStoreSegment = info.getDocStoreSegment();
509 msg(" docStoreIsCompoundFile=" + info.getDocStoreIsCompoundFile());
510 segInfoStat.docStoreCompoundFile = info.getDocStoreIsCompoundFile();
512 final String delFileName = info.getDelFileName();
513 if (delFileName == null){
514 msg(" no deletions");
515 segInfoStat.hasDeletions = false;
518 msg(" has deletions [delFileName=" + delFileName + "]");
519 segInfoStat.hasDeletions = true;
520 segInfoStat.deletionsFileName = delFileName;
522 if (infoStream != null)
523 infoStream.print(" test: open reader.........");
524 reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
526 segInfoStat.openReaderPassed = true;
528 final int numDocs = reader.numDocs();
529 toLoseDocCount = numDocs;
530 if (reader.hasDeletions()) {
531 if (reader.deletedDocs.count() != info.getDelCount()) {
532 throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.count());
534 if (reader.deletedDocs.count() > reader.maxDoc()) {
535 throw new RuntimeException("too many deleted docs: maxDoc()=" + reader.maxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.count());
537 if (info.docCount - numDocs != info.getDelCount()){
538 throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.docCount - numDocs));
540 segInfoStat.numDeleted = info.docCount - numDocs;
541 msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
543 if (info.getDelCount() != 0) {
544 throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.docCount - numDocs));
548 if (reader.maxDoc() != info.docCount)
549 throw new RuntimeException("SegmentReader.maxDoc() " + reader.maxDoc() + " != SegmentInfos.docCount " + info.docCount);
551 // Test getFieldNames()
552 if (infoStream != null) {
553 infoStream.print(" test: fields..............");
555 Collection<String> fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL);
556 msg("OK [" + fieldNames.size() + " fields]");
557 segInfoStat.numFields = fieldNames.size();
560 segInfoStat.fieldNormStatus = testFieldNorms(fieldNames, reader);
562 // Test the Term Index
563 segInfoStat.termIndexStatus = testTermIndex(info, reader);
565 // Test Stored Fields
566 segInfoStat.storedFieldStatus = testStoredFields(info, reader, nf);
569 segInfoStat.termVectorStatus = testTermVectors(info, reader, nf);
571 // Rethrow the first exception we encountered
572 // This will cause stats for failed segments to be incremented properly
573 if (segInfoStat.fieldNormStatus.error != null) {
574 throw new RuntimeException("Field Norm test failed");
575 } else if (segInfoStat.termIndexStatus.error != null) {
576 throw new RuntimeException("Term Index test failed");
577 } else if (segInfoStat.storedFieldStatus.error != null) {
578 throw new RuntimeException("Stored Field test failed");
579 } else if (segInfoStat.termVectorStatus.error != null) {
580 throw new RuntimeException("Term Vector test failed");
585 } catch (Throwable t) {
588 comment = "fixIndex() would remove reference to this segment";
589 msg(" WARNING: " + comment + "; full exception:");
590 if (infoStream != null)
591 t.printStackTrace(infoStream);
593 result.totLoseDocCount += toLoseDocCount;
594 result.numBadSegments++;
602 result.newSegments.add((SegmentInfo) info.clone());
605 if (0 == result.numBadSegments) {
608 msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
610 if ( ! (result.validCounter = (result.maxSegmentName < sis.counter))) {
611 result.clean = false;
612 result.newSegments.counter = result.maxSegmentName + 1;
613 msg("ERROR: Next segment name counter " + sis.counter + " is not greater than max segment name " + result.maxSegmentName);
617 msg("No problems were detected with this index.\n");
626 private Status.FieldNormStatus testFieldNorms(Collection<String> fieldNames, SegmentReader reader) {
627 final Status.FieldNormStatus status = new Status.FieldNormStatus();
631 if (infoStream != null) {
632 infoStream.print(" test: field norms.........");
634 final byte[] b = new byte[reader.maxDoc()];
635 for (final String fieldName : fieldNames) {
636 if (reader.hasNorms(fieldName)) {
637 reader.norms(fieldName, b, 0);
642 msg("OK [" + status.totFields + " fields]");
643 } catch (Throwable e) {
644 msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
646 if (infoStream != null) {
647 e.printStackTrace(infoStream);
655 * Test the term index.
657 private Status.TermIndexStatus testTermIndex(SegmentInfo info, SegmentReader reader) {
658 final Status.TermIndexStatus status = new Status.TermIndexStatus();
660 final IndexSearcher is = new IndexSearcher(reader);
663 if (infoStream != null) {
664 infoStream.print(" test: terms, freq, prox...");
667 final TermEnum termEnum = reader.terms();
668 final TermPositions termPositions = reader.termPositions();
670 // Used only to count up # deleted docs for this term
671 final MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);
673 final int maxDoc = reader.maxDoc();
674 Term lastTerm = null;
675 while (termEnum.next()) {
677 final Term term = termEnum.term();
680 final int docFreq = termEnum.docFreq();
681 termPositions.seek(term);
684 status.totFreq += docFreq;
685 while (termPositions.next()) {
687 final int doc = termPositions.doc();
688 final int freq = termPositions.freq();
690 throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
692 throw new RuntimeException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
696 throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
699 status.totPos += freq;
700 for(int j=0;j<freq;j++) {
701 final int pos = termPositions.nextPosition();
703 throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
705 throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
711 for(int idx=0;idx<7;idx++) {
712 final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
713 termPositions.seek(term);
714 if (!termPositions.skipTo(skipDocID)) {
718 final int docID = termPositions.doc();
719 if (docID < skipDocID) {
720 throw new RuntimeException("term " + term + ": skipTo(docID=" + skipDocID + ") returned docID=" + docID);
722 final int freq = termPositions.freq();
724 throw new RuntimeException("termFreq " + freq + " is out of bounds");
726 int lastPosition = -1;
727 for(int posUpto=0;posUpto<freq;posUpto++) {
728 final int pos = termPositions.nextPosition();
730 throw new RuntimeException("position " + pos + " is out of bounds");
732 // TODO: we should assert when all pos == 0 that positions are actually omitted
733 if (pos < lastPosition) {
734 throw new RuntimeException("position " + pos + " is < lastPosition " + lastPosition);
739 if (!termPositions.next()) {
742 final int nextDocID = termPositions.doc();
743 if (nextDocID <= docID) {
744 throw new RuntimeException("term " + term + ": skipTo(docID=" + skipDocID + "), then .next() returned docID=" + nextDocID + " vs prev docID=" + docID);
749 // Now count how many deleted docs occurred in
752 if (reader.hasDeletions()) {
753 myTermDocs.seek(term);
754 while(myTermDocs.next()) { }
755 delCount = myTermDocs.delCount;
760 if (freq0 + delCount != docFreq) {
761 throw new RuntimeException("term " + term + " docFreq=" +
762 docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
766 // Test search on last term:
767 if (lastTerm != null) {
768 is.search(new TermQuery(lastTerm), 1);
771 msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
773 } catch (Throwable e) {
774 msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
776 if (infoStream != null) {
777 e.printStackTrace(infoStream);
785 * Test stored fields for a segment.
787 private Status.StoredFieldStatus testStoredFields(SegmentInfo info, SegmentReader reader, NumberFormat format) {
788 final Status.StoredFieldStatus status = new Status.StoredFieldStatus();
791 if (infoStream != null) {
792 infoStream.print(" test: stored fields.......");
795 // Scan stored fields for all documents
796 for (int j = 0; j < info.docCount; ++j) {
797 if (!reader.isDeleted(j)) {
799 Document doc = reader.document(j);
800 status.totFields += doc.getFields().size();
805 if (status.docCount != reader.numDocs()) {
806 throw new RuntimeException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs");
809 msg("OK [" + status.totFields + " total field count; avg " +
810 format.format((((float) status.totFields)/status.docCount)) + " fields per doc]");
811 } catch (Throwable e) {
812 msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
814 if (infoStream != null) {
815 e.printStackTrace(infoStream);
823 * Test term vectors for a segment.
825 private Status.TermVectorStatus testTermVectors(SegmentInfo info, SegmentReader reader, NumberFormat format) {
826 final Status.TermVectorStatus status = new Status.TermVectorStatus();
829 if (infoStream != null) {
830 infoStream.print(" test: term vectors........");
833 for (int j = 0; j < info.docCount; ++j) {
834 if (!reader.isDeleted(j)) {
836 TermFreqVector[] tfv = reader.getTermFreqVectors(j);
838 status.totVectors += tfv.length;
843 msg("OK [" + status.totVectors + " total vector count; avg " +
844 format.format((((float) status.totVectors) / status.docCount)) + " term/freq vector fields per doc]");
845 } catch (Throwable e) {
846 msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
848 if (infoStream != null) {
849 e.printStackTrace(infoStream);
856 /** Repairs the index using previously returned result
857 * from {@link #checkIndex}. Note that this does not
858 * remove any of the unreferenced files after it's done;
859 * you must separately open an {@link IndexWriter}, which
860 * deletes unreferenced files when it's created.
862 * <p><b>WARNING</b>: this writes a
863 * new segments file into the index, effectively removing
864 * all documents in broken segments from the index.
867 * <p><b>WARNING</b>: Make sure you only call this when the
868 * index is not opened by any writer. */
869 public void fixIndex(Status result) throws IOException {
871 throw new IllegalArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)");
872 result.newSegments.changed();
873 result.newSegments.commit(result.dir);
876 private static boolean assertsOn;
878 private static boolean testAsserts() {
883 private static boolean assertsOn() {
884 assert testAsserts();
888 /** Command-line interface to check and fix an index.
893 java -ea:org.apache.lucene... org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]
896 <li><code>-fix</code>: actually write a new segments_N file, removing any problematic segments
898 <li><code>-segment X</code>: only check the specified
899 segment(s). This can be specified multiple times,
900 to check more than one segment, eg <code>-segment _2
901 -segment _a</code>. You can't use this with the -fix
905 <p><b>WARNING</b>: <code>-fix</code> should only be used on an emergency basis as it will cause
906 documents (perhaps many) to be permanently removed from the index. Always make
907 a backup copy of your index before running this! Do not run this tool on an index
908 that is actively being written to. You have been warned!
910 <p> Run without -fix, this tool will open the index, report version information
911 and report any exceptions it hits and what action it would take if -fix were
912 specified. With -fix, this tool will remove any segments that have issues and
913 write a new segments_N file. This means all documents contained in the affected
914 segments will be removed.
917 This tool exits with exit code 1 if the index cannot be opened or has any
920 public static void main(String[] args) throws IOException, InterruptedException {
922 boolean doFix = false;
923 List<String> onlySegments = new ArrayList<String>();
924 String indexPath = null;
926 while(i < args.length) {
927 if (args[i].equals("-fix")) {
930 } else if (args[i].equals("-segment")) {
931 if (i == args.length-1) {
932 System.out.println("ERROR: missing name for -segment option");
935 onlySegments.add(args[i+1]);
938 if (indexPath != null) {
939 System.out.println("ERROR: unexpected extra argument '" + args[i] + "'");
947 if (indexPath == null) {
948 System.out.println("\nERROR: index path not specified");
949 System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" +
951 " -fix: actually write a new segments_N file, removing any problematic segments\n" +
952 " -segment X: only check the specified segments. This can be specified multiple\n" +
953 " times, to check more than one segment, eg '-segment _2 -segment _a'.\n" +
954 " You can't use this with the -fix option\n" +
956 "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" +
957 "documents (perhaps many) to be permanently removed from the index. Always make\n" +
958 "a backup copy of your index before running this! Do not run this tool on an index\n" +
959 "that is actively being written to. You have been warned!\n" +
961 "Run without -fix, this tool will open the index, report version information\n" +
962 "and report any exceptions it hits and what action it would take if -fix were\n" +
963 "specified. With -fix, this tool will remove any segments that have issues and\n" +
964 "write a new segments_N file. This means all documents contained in the affected\n" +
965 "segments will be removed.\n" +
967 "This tool exits with exit code 1 if the index cannot be opened or has any\n" +
968 "corruption, else 0.\n");
973 System.out.println("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled");
975 if (onlySegments.size() == 0)
978 System.out.println("ERROR: cannot specify both -fix and -segment");
982 System.out.println("\nOpening index @ " + indexPath + "\n");
983 Directory dir = null;
985 dir = FSDirectory.open(new File(indexPath));
986 } catch (Throwable t) {
987 System.out.println("ERROR: could not open directory \"" + indexPath + "\"; exiting");
988 t.printStackTrace(System.out);
992 CheckIndex checker = new CheckIndex(dir);
993 checker.setInfoStream(System.out);
995 Status result = checker.checkIndex(onlySegments);
996 if (result.missingSegments) {
1000 if (!result.clean) {
1002 System.out.println("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n");
1004 System.out.println("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
1005 System.out.println("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
1006 for(int s=0;s<5;s++) {
1008 System.out.println(" " + (5-s) + "...");
1010 System.out.println("Writing...");
1011 checker.fixIndex(result);
1012 System.out.println("OK");
1013 System.out.println("Wrote new segments file \"" + result.newSegments.getCurrentSegmentFileName() + "\"");
1016 System.out.println("");
1019 if (result.clean == true)
1023 System.exit(exitCode);