X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/CheckIndex.java?ds=sidebyside diff --git a/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/CheckIndex.java b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/CheckIndex.java new file mode 100644 index 0000000..64c4431 --- /dev/null +++ b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/CheckIndex.java @@ -0,0 +1,1037 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.StringHelper; +import org.apache.lucene.document.AbstractField; // for javadocs +import org.apache.lucene.document.Document; + +import java.text.NumberFormat; +import java.io.PrintStream; +import java.io.IOException; +import java.io.File; +import java.util.Collection; + +import java.util.Comparator; +import java.util.List; +import java.util.ArrayList; +import java.util.Map; + +/** + * Basic tool and API to check the health of an index and + * write a new segments file that removes reference to + * problematic segments. + * + *

As this tool checks every byte in the index, on a large + * index it can take quite a long time to run. + * + * @lucene.experimental Please make a complete backup of your + * index before using this to fix your index! + */ +public class CheckIndex { + + private PrintStream infoStream; + private Directory dir; + + /** + * Returned from {@link #checkIndex()} detailing the health and status of the index. + * + * @lucene.experimental + **/ + + public static class Status { + + /** True if no problems were found with the index. */ + public boolean clean; + + /** True if we were unable to locate and load the segments_N file. */ + public boolean missingSegments; + + /** True if we were unable to open the segments_N file. */ + public boolean cantOpenSegments; + + /** True if we were unable to read the version number from segments_N file. */ + public boolean missingSegmentVersion; + + /** Name of latest segments_N file in the index. */ + public String segmentsFileName; + + /** Number of segments in the index. */ + public int numSegments; + + /** String description of the version of the index. */ + public String segmentFormat; + + /** Empty unless you passed specific segments list to check as optional 3rd argument. + * @see CheckIndex#checkIndex(List) */ + public List segmentsChecked = new ArrayList(); + + /** True if the index was created with a newer version of Lucene than the CheckIndex tool. */ + public boolean toolOutOfDate; + + /** List of {@link SegmentInfoStatus} instances, detailing status of each segment. */ + public List segmentInfos = new ArrayList(); + + /** Directory index is in. */ + public Directory dir; + + /** + * SegmentInfos instance containing only segments that + * had no problems (this is used with the {@link CheckIndex#fixIndex} + * method to repair the index. + */ + SegmentInfos newSegments; + + /** How many documents will be lost to bad segments. */ + public int totLoseDocCount; + + /** How many bad segments were found. */ + public int numBadSegments; + + /** True if we checked only specific segments ({@link + * #checkIndex(List)}) was called with non-null + * argument). */ + public boolean partial; + + /** The greatest segment name. */ + public int maxSegmentName; + + /** Whether the SegmentInfos.counter is greater than any of the segments' names. */ + public boolean validCounter; + + /** Holds the userData of the last commit in the index */ + public Map userData; + + /** Holds the status of each segment in the index. + * See {@link #segmentInfos}. + * + *

WARNING: this API is new and experimental and is + * subject to suddenly change in the next release. + */ + public static class SegmentInfoStatus { + /** Name of the segment. */ + public String name; + + /** Document count (does not take deletions into account). */ + public int docCount; + + /** True if segment is compound file format. */ + public boolean compound; + + /** Number of files referenced by this segment. */ + public int numFiles; + + /** Net size (MB) of the files referenced by this + * segment. */ + public double sizeMB; + + /** Doc store offset, if this segment shares the doc + * store files (stored fields and term vectors) with + * other segments. This is -1 if it does not share. */ + public int docStoreOffset = -1; + + /** String of the shared doc store segment, or null if + * this segment does not share the doc store files. */ + public String docStoreSegment; + + /** True if the shared doc store files are compound file + * format. */ + public boolean docStoreCompoundFile; + + /** True if this segment has pending deletions. */ + public boolean hasDeletions; + + /** Name of the current deletions file name. */ + public String deletionsFileName; + + /** Number of deleted documents. */ + public int numDeleted; + + /** True if we were able to open a SegmentReader on this + * segment. */ + public boolean openReaderPassed; + + /** Number of fields in this segment. */ + int numFields; + + /** True if at least one of the fields in this segment + * has position data + * @see AbstractField#setIndexOptions(org.apache.lucene.index.FieldInfo.IndexOptions) */ + public boolean hasProx; + + /** Map that includes certain + * debugging details that IndexWriter records into + * each segment it creates */ + public Map diagnostics; + + /** Status for testing of field norms (null if field norms could not be tested). */ + public FieldNormStatus fieldNormStatus; + + /** Status for testing of indexed terms (null if indexed terms could not be tested). */ + public TermIndexStatus termIndexStatus; + + /** Status for testing of stored fields (null if stored fields could not be tested). */ + public StoredFieldStatus storedFieldStatus; + + /** Status for testing of term vectors (null if term vectors could not be tested). */ + public TermVectorStatus termVectorStatus; + } + + /** + * Status from testing field norms. + */ + public static final class FieldNormStatus { + /** Number of fields successfully tested */ + public long totFields = 0L; + + /** Exception thrown during term index test (null on success) */ + public Throwable error = null; + } + + /** + * Status from testing term index. + */ + public static final class TermIndexStatus { + /** Total term count */ + public long termCount = 0L; + + /** Total frequency across all terms. */ + public long totFreq = 0L; + + /** Total number of positions. */ + public long totPos = 0L; + + /** Exception thrown during term index test (null on success) */ + public Throwable error = null; + } + + /** + * Status from testing stored fields. + */ + public static final class StoredFieldStatus { + + /** Number of documents tested. */ + public int docCount = 0; + + /** Total number of stored fields tested. */ + public long totFields = 0; + + /** Exception thrown during stored fields test (null on success) */ + public Throwable error = null; + } + + /** + * Status from testing stored fields. + */ + public static final class TermVectorStatus { + + /** Number of documents tested. */ + public int docCount = 0; + + /** Total number of term vectors tested. */ + public long totVectors = 0; + + /** Exception thrown during term vector test (null on success) */ + public Throwable error = null; + } + } + + /** Create a new CheckIndex on the directory. */ + public CheckIndex(Directory dir) { + this.dir = dir; + infoStream = null; + } + + /** Set infoStream where messages should go. If null, no + * messages are printed */ + public void setInfoStream(PrintStream out) { + infoStream = out; + } + + private void msg(String msg) { + if (infoStream != null) + infoStream.println(msg); + } + + private static class MySegmentTermDocs extends SegmentTermDocs { + + int delCount; + + MySegmentTermDocs(SegmentReader p) { + super(p); + } + + @Override + public void seek(Term term) throws IOException { + super.seek(term); + delCount = 0; + } + + @Override + protected void skippingDoc() throws IOException { + delCount++; + } + } + + /** Returns a {@link Status} instance detailing + * the state of the index. + * + *

As this method checks every byte in the index, on a large + * index it can take quite a long time to run. + * + *

WARNING: make sure + * you only call this when the index is not opened by any + * writer. */ + public Status checkIndex() throws IOException { + return checkIndex(null); + } + + /** Returns a {@link Status} instance detailing + * the state of the index. + * + * @param onlySegments list of specific segment names to check + * + *

As this method checks every byte in the specified + * segments, on a large index it can take quite a long + * time to run. + * + *

WARNING: make sure + * you only call this when the index is not opened by any + * writer. */ + public Status checkIndex(List onlySegments) throws IOException { + NumberFormat nf = NumberFormat.getInstance(); + SegmentInfos sis = new SegmentInfos(); + Status result = new Status(); + result.dir = dir; + try { + sis.read(dir); + } catch (Throwable t) { + msg("ERROR: could not read any segments file in directory"); + result.missingSegments = true; + if (infoStream != null) + t.printStackTrace(infoStream); + return result; + } + + // find the oldest and newest segment versions + String oldest = Integer.toString(Integer.MAX_VALUE), newest = Integer.toString(Integer.MIN_VALUE); + String oldSegs = null; + boolean foundNonNullVersion = false; + Comparator versionComparator = StringHelper.getVersionComparator(); + for (SegmentInfo si : sis) { + String version = si.getVersion(); + if (version == null) { + // pre-3.1 segment + oldSegs = "pre-3.1"; + } else if (version.equals("2.x")) { + // an old segment that was 'touched' by 3.1+ code + oldSegs = "2.x"; + } else { + foundNonNullVersion = true; + if (versionComparator.compare(version, oldest) < 0) { + oldest = version; + } + if (versionComparator.compare(version, newest) > 0) { + newest = version; + } + } + } + + final int numSegments = sis.size(); + final String segmentsFileName = sis.getCurrentSegmentFileName(); + IndexInput input = null; + try { + input = dir.openInput(segmentsFileName); + } catch (Throwable t) { + msg("ERROR: could not open segments file in directory"); + if (infoStream != null) + t.printStackTrace(infoStream); + result.cantOpenSegments = true; + return result; + } + int format = 0; + try { + format = input.readInt(); + } catch (Throwable t) { + msg("ERROR: could not read segment file version in directory"); + if (infoStream != null) + t.printStackTrace(infoStream); + result.missingSegmentVersion = true; + return result; + } finally { + if (input != null) + input.close(); + } + + String sFormat = ""; + boolean skip = false; + + if (format == SegmentInfos.FORMAT) + sFormat = "FORMAT [Lucene Pre-2.1]"; + if (format == SegmentInfos.FORMAT_LOCKLESS) + sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; + else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) + sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; + else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) + sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; + else { + if (format == SegmentInfos.FORMAT_CHECKSUM) + sFormat = "FORMAT_CHECKSUM [Lucene 2.4]"; + else if (format == SegmentInfos.FORMAT_DEL_COUNT) + sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]"; + else if (format == SegmentInfos.FORMAT_HAS_PROX) + sFormat = "FORMAT_HAS_PROX [Lucene 2.4]"; + else if (format == SegmentInfos.FORMAT_USER_DATA) + sFormat = "FORMAT_USER_DATA [Lucene 2.9]"; + else if (format == SegmentInfos.FORMAT_DIAGNOSTICS) + sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]"; + else if (format == SegmentInfos.FORMAT_HAS_VECTORS) + sFormat = "FORMAT_HAS_VECTORS [Lucene 3.1]"; + else if (format == SegmentInfos.FORMAT_3_1) + sFormat = "FORMAT_3_1 [Lucene 3.1+]"; + else if (format == SegmentInfos.CURRENT_FORMAT) + throw new RuntimeException("BUG: You should update this tool!"); + else if (format < SegmentInfos.CURRENT_FORMAT) { + sFormat = "int=" + format + " [newer version of Lucene than this tool]"; + skip = true; + } else { + sFormat = format + " [Lucene 1.3 or prior]"; + } + } + + result.segmentsFileName = segmentsFileName; + result.numSegments = numSegments; + result.segmentFormat = sFormat; + result.userData = sis.getUserData(); + String userDataString; + if (sis.getUserData().size() > 0) { + userDataString = " userData=" + sis.getUserData(); + } else { + userDataString = ""; + } + + String versionString = null; + if (oldSegs != null) { + if (foundNonNullVersion) { + versionString = "versions=[" + oldSegs + " .. " + newest + "]"; + } else { + versionString = "version=" + oldSegs; + } + } else { + versionString = oldest.equals(newest) ? ( "version=" + oldest ) : ("versions=[" + oldest + " .. " + newest + "]"); + } + + msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + + " " + versionString + " format=" + sFormat + userDataString); + + if (onlySegments != null) { + result.partial = true; + if (infoStream != null) + infoStream.print("\nChecking only these segments:"); + for (String s : onlySegments) { + if (infoStream != null) + infoStream.print(" " + s); + } + result.segmentsChecked.addAll(onlySegments); + msg(":"); + } + + if (skip) { + msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); + result.toolOutOfDate = true; + return result; + } + + + result.newSegments = (SegmentInfos) sis.clone(); + result.newSegments.clear(); + result.maxSegmentName = -1; + + for(int i=0;i result.maxSegmentName) { + result.maxSegmentName = segmentName; + } + if (onlySegments != null && !onlySegments.contains(info.name)) + continue; + Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus(); + result.segmentInfos.add(segInfoStat); + msg(" " + (1+i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); + segInfoStat.name = info.name; + segInfoStat.docCount = info.docCount; + + int toLoseDocCount = info.docCount; + + SegmentReader reader = null; + + try { + msg(" compound=" + info.getUseCompoundFile()); + segInfoStat.compound = info.getUseCompoundFile(); + msg(" hasProx=" + info.getHasProx()); + segInfoStat.hasProx = info.getHasProx(); + msg(" numFiles=" + info.files().size()); + segInfoStat.numFiles = info.files().size(); + segInfoStat.sizeMB = info.sizeInBytes(true)/(1024.*1024.); + msg(" size (MB)=" + nf.format(segInfoStat.sizeMB)); + Map diagnostics = info.getDiagnostics(); + segInfoStat.diagnostics = diagnostics; + if (diagnostics.size() > 0) { + msg(" diagnostics = " + diagnostics); + } + + final int docStoreOffset = info.getDocStoreOffset(); + if (docStoreOffset != -1) { + msg(" docStoreOffset=" + docStoreOffset); + segInfoStat.docStoreOffset = docStoreOffset; + msg(" docStoreSegment=" + info.getDocStoreSegment()); + segInfoStat.docStoreSegment = info.getDocStoreSegment(); + msg(" docStoreIsCompoundFile=" + info.getDocStoreIsCompoundFile()); + segInfoStat.docStoreCompoundFile = info.getDocStoreIsCompoundFile(); + } + final String delFileName = info.getDelFileName(); + if (delFileName == null){ + msg(" no deletions"); + segInfoStat.hasDeletions = false; + } + else{ + msg(" has deletions [delFileName=" + delFileName + "]"); + segInfoStat.hasDeletions = true; + segInfoStat.deletionsFileName = delFileName; + } + if (infoStream != null) + infoStream.print(" test: open reader........."); + reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); + + segInfoStat.openReaderPassed = true; + + final int numDocs = reader.numDocs(); + toLoseDocCount = numDocs; + if (reader.hasDeletions()) { + if (reader.deletedDocs.count() != info.getDelCount()) { + throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.count()); + } + if (reader.deletedDocs.count() > reader.maxDoc()) { + throw new RuntimeException("too many deleted docs: maxDoc()=" + reader.maxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.count()); + } + if (info.docCount - numDocs != info.getDelCount()){ + throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.docCount - numDocs)); + } + segInfoStat.numDeleted = info.docCount - numDocs; + msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]"); + } else { + if (info.getDelCount() != 0) { + throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.docCount - numDocs)); + } + msg("OK"); + } + if (reader.maxDoc() != info.docCount) + throw new RuntimeException("SegmentReader.maxDoc() " + reader.maxDoc() + " != SegmentInfos.docCount " + info.docCount); + + // Test getFieldNames() + if (infoStream != null) { + infoStream.print(" test: fields.............."); + } + Collection fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL); + msg("OK [" + fieldNames.size() + " fields]"); + segInfoStat.numFields = fieldNames.size(); + + // Test Field Norms + segInfoStat.fieldNormStatus = testFieldNorms(fieldNames, reader); + + // Test the Term Index + segInfoStat.termIndexStatus = testTermIndex(info, reader); + + // Test Stored Fields + segInfoStat.storedFieldStatus = testStoredFields(info, reader, nf); + + // Test Term Vectors + segInfoStat.termVectorStatus = testTermVectors(info, reader, nf); + + // Rethrow the first exception we encountered + // This will cause stats for failed segments to be incremented properly + if (segInfoStat.fieldNormStatus.error != null) { + throw new RuntimeException("Field Norm test failed"); + } else if (segInfoStat.termIndexStatus.error != null) { + throw new RuntimeException("Term Index test failed"); + } else if (segInfoStat.storedFieldStatus.error != null) { + throw new RuntimeException("Stored Field test failed"); + } else if (segInfoStat.termVectorStatus.error != null) { + throw new RuntimeException("Term Vector test failed"); + } + + msg(""); + + } catch (Throwable t) { + msg("FAILED"); + String comment; + comment = "fixIndex() would remove reference to this segment"; + msg(" WARNING: " + comment + "; full exception:"); + if (infoStream != null) + t.printStackTrace(infoStream); + msg(""); + result.totLoseDocCount += toLoseDocCount; + result.numBadSegments++; + continue; + } finally { + if (reader != null) + reader.close(); + } + + // Keeper + result.newSegments.add((SegmentInfo) info.clone()); + } + + if (0 == result.numBadSegments) { + result.clean = true; + } else + msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected"); + + if ( ! (result.validCounter = (result.maxSegmentName < sis.counter))) { + result.clean = false; + result.newSegments.counter = result.maxSegmentName + 1; + msg("ERROR: Next segment name counter " + sis.counter + " is not greater than max segment name " + result.maxSegmentName); + } + + if (result.clean) { + msg("No problems were detected with this index.\n"); + } + + return result; + } + + /** + * Test field norms. + */ + private Status.FieldNormStatus testFieldNorms(Collection fieldNames, SegmentReader reader) { + final Status.FieldNormStatus status = new Status.FieldNormStatus(); + + try { + // Test Field Norms + if (infoStream != null) { + infoStream.print(" test: field norms........."); + } + final byte[] b = new byte[reader.maxDoc()]; + for (final String fieldName : fieldNames) { + if (reader.hasNorms(fieldName)) { + reader.norms(fieldName, b, 0); + ++status.totFields; + } + } + + msg("OK [" + status.totFields + " fields]"); + } catch (Throwable e) { + msg("ERROR [" + String.valueOf(e.getMessage()) + "]"); + status.error = e; + if (infoStream != null) { + e.printStackTrace(infoStream); + } + } + + return status; + } + + /** + * Test the term index. + */ + private Status.TermIndexStatus testTermIndex(SegmentInfo info, SegmentReader reader) { + final Status.TermIndexStatus status = new Status.TermIndexStatus(); + + final IndexSearcher is = new IndexSearcher(reader); + + try { + if (infoStream != null) { + infoStream.print(" test: terms, freq, prox..."); + } + + final TermEnum termEnum = reader.terms(); + final TermPositions termPositions = reader.termPositions(); + + // Used only to count up # deleted docs for this term + final MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader); + + final int maxDoc = reader.maxDoc(); + Term lastTerm = null; + while (termEnum.next()) { + status.termCount++; + final Term term = termEnum.term(); + lastTerm = term; + + final int docFreq = termEnum.docFreq(); + if (docFreq <= 0) { + throw new RuntimeException("docfreq: " + docFreq + " is out of bounds"); + } + termPositions.seek(term); + int lastDoc = -1; + int freq0 = 0; + status.totFreq += docFreq; + while (termPositions.next()) { + freq0++; + final int doc = termPositions.doc(); + final int freq = termPositions.freq(); + if (doc <= lastDoc) + throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc); + if (doc >= maxDoc) + throw new RuntimeException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc); + + lastDoc = doc; + if (freq <= 0) + throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); + + int lastPos = -1; + status.totPos += freq; + for(int j=0;jWARNING: this writes a + * new segments file into the index, effectively removing + * all documents in broken segments from the index. + * BE CAREFUL. + * + *

WARNING: Make sure you only call this when the + * index is not opened by any writer. */ + public void fixIndex(Status result) throws IOException { + if (result.partial) + throw new IllegalArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)"); + result.newSegments.changed(); + result.newSegments.commit(result.dir); + } + + private static boolean assertsOn; + + private static boolean testAsserts() { + assertsOn = true; + return true; + } + + private static boolean assertsOn() { + assert testAsserts(); + return assertsOn; + } + + /** Command-line interface to check and fix an index. + +

+ Run it like this: +

+    java -ea:org.apache.lucene... org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]
+    
+ + +

WARNING: -fix should only be used on an emergency basis as it will cause + documents (perhaps many) to be permanently removed from the index. Always make + a backup copy of your index before running this! Do not run this tool on an index + that is actively being written to. You have been warned! + +

Run without -fix, this tool will open the index, report version information + and report any exceptions it hits and what action it would take if -fix were + specified. With -fix, this tool will remove any segments that have issues and + write a new segments_N file. This means all documents contained in the affected + segments will be removed. + +

+ This tool exits with exit code 1 if the index cannot be opened or has any + corruption, else 0. + */ + public static void main(String[] args) throws IOException, InterruptedException { + + boolean doFix = false; + List onlySegments = new ArrayList(); + String indexPath = null; + int i = 0; + while(i < args.length) { + if (args[i].equals("-fix")) { + doFix = true; + i++; + } else if (args[i].equals("-segment")) { + if (i == args.length-1) { + System.out.println("ERROR: missing name for -segment option"); + System.exit(1); + } + onlySegments.add(args[i+1]); + i += 2; + } else { + if (indexPath != null) { + System.out.println("ERROR: unexpected extra argument '" + args[i] + "'"); + System.exit(1); + } + indexPath = args[i]; + i++; + } + } + + if (indexPath == null) { + System.out.println("\nERROR: index path not specified"); + System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" + + "\n" + + " -fix: actually write a new segments_N file, removing any problematic segments\n" + + " -segment X: only check the specified segments. This can be specified multiple\n" + + " times, to check more than one segment, eg '-segment _2 -segment _a'.\n" + + " You can't use this with the -fix option\n" + + "\n" + + "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" + + "documents (perhaps many) to be permanently removed from the index. Always make\n" + + "a backup copy of your index before running this! Do not run this tool on an index\n" + + "that is actively being written to. You have been warned!\n" + + "\n" + + "Run without -fix, this tool will open the index, report version information\n" + + "and report any exceptions it hits and what action it would take if -fix were\n" + + "specified. With -fix, this tool will remove any segments that have issues and\n" + + "write a new segments_N file. This means all documents contained in the affected\n" + + "segments will be removed.\n" + + "\n" + + "This tool exits with exit code 1 if the index cannot be opened or has any\n" + + "corruption, else 0.\n"); + System.exit(1); + } + + if (!assertsOn()) + System.out.println("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled"); + + if (onlySegments.size() == 0) + onlySegments = null; + else if (doFix) { + System.out.println("ERROR: cannot specify both -fix and -segment"); + System.exit(1); + } + + System.out.println("\nOpening index @ " + indexPath + "\n"); + Directory dir = null; + try { + dir = FSDirectory.open(new File(indexPath)); + } catch (Throwable t) { + System.out.println("ERROR: could not open directory \"" + indexPath + "\"; exiting"); + t.printStackTrace(System.out); + System.exit(1); + } + + CheckIndex checker = new CheckIndex(dir); + checker.setInfoStream(System.out); + + Status result = checker.checkIndex(onlySegments); + if (result.missingSegments) { + System.exit(1); + } + + if (!result.clean) { + if (!doFix) { + System.out.println("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n"); + } else { + System.out.println("WARNING: " + result.totLoseDocCount + " documents will be lost\n"); + System.out.println("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!"); + for(int s=0;s<5;s++) { + Thread.sleep(1000); + System.out.println(" " + (5-s) + "..."); + } + System.out.println("Writing..."); + checker.fixIndex(result); + System.out.println("OK"); + System.out.println("Wrote new segments file \"" + result.newSegments.getCurrentSegmentFileName() + "\""); + } + } + System.out.println(""); + + final int exitCode; + if (result.clean == true) + exitCode = 0; + else + exitCode = 1; + System.exit(exitCode); + } +}