X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/CheckIndex.java?ds=sidebyside
diff --git a/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/CheckIndex.java b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/CheckIndex.java
new file mode 100644
index 0000000..64c4431
--- /dev/null
+++ b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/CheckIndex.java
@@ -0,0 +1,1037 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.document.AbstractField; // for javadocs
+import org.apache.lucene.document.Document;
+
+import java.text.NumberFormat;
+import java.io.PrintStream;
+import java.io.IOException;
+import java.io.File;
+import java.util.Collection;
+
+import java.util.Comparator;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+
+/**
+ * Basic tool and API to check the health of an index and
+ * write a new segments file that removes reference to
+ * problematic segments.
+ *
+ *
As this tool checks every byte in the index, on a large
+ * index it can take quite a long time to run.
+ *
+ * @lucene.experimental Please make a complete backup of your
+ * index before using this to fix your index!
+ */
+public class CheckIndex {
+
+ private PrintStream infoStream;
+ private Directory dir;
+
+ /**
+ * Returned from {@link #checkIndex()} detailing the health and status of the index.
+ *
+ * @lucene.experimental
+ **/
+
+ public static class Status {
+
+ /** True if no problems were found with the index. */
+ public boolean clean;
+
+ /** True if we were unable to locate and load the segments_N file. */
+ public boolean missingSegments;
+
+ /** True if we were unable to open the segments_N file. */
+ public boolean cantOpenSegments;
+
+ /** True if we were unable to read the version number from segments_N file. */
+ public boolean missingSegmentVersion;
+
+ /** Name of latest segments_N file in the index. */
+ public String segmentsFileName;
+
+ /** Number of segments in the index. */
+ public int numSegments;
+
+ /** String description of the version of the index. */
+ public String segmentFormat;
+
+ /** Empty unless you passed specific segments list to check as optional 3rd argument.
+ * @see CheckIndex#checkIndex(List) */
+ public List segmentsChecked = new ArrayList();
+
+ /** True if the index was created with a newer version of Lucene than the CheckIndex tool. */
+ public boolean toolOutOfDate;
+
+ /** List of {@link SegmentInfoStatus} instances, detailing status of each segment. */
+ public List segmentInfos = new ArrayList();
+
+ /** Directory index is in. */
+ public Directory dir;
+
+ /**
+ * SegmentInfos instance containing only segments that
+ * had no problems (this is used with the {@link CheckIndex#fixIndex}
+ * method to repair the index.
+ */
+ SegmentInfos newSegments;
+
+ /** How many documents will be lost to bad segments. */
+ public int totLoseDocCount;
+
+ /** How many bad segments were found. */
+ public int numBadSegments;
+
+ /** True if we checked only specific segments ({@link
+ * #checkIndex(List)}) was called with non-null
+ * argument). */
+ public boolean partial;
+
+ /** The greatest segment name. */
+ public int maxSegmentName;
+
+ /** Whether the SegmentInfos.counter is greater than any of the segments' names. */
+ public boolean validCounter;
+
+ /** Holds the userData of the last commit in the index */
+ public Map userData;
+
+ /** Holds the status of each segment in the index.
+ * See {@link #segmentInfos}.
+ *
+ * WARNING: this API is new and experimental and is
+ * subject to suddenly change in the next release.
+ */
+ public static class SegmentInfoStatus {
+ /** Name of the segment. */
+ public String name;
+
+ /** Document count (does not take deletions into account). */
+ public int docCount;
+
+ /** True if segment is compound file format. */
+ public boolean compound;
+
+ /** Number of files referenced by this segment. */
+ public int numFiles;
+
+ /** Net size (MB) of the files referenced by this
+ * segment. */
+ public double sizeMB;
+
+ /** Doc store offset, if this segment shares the doc
+ * store files (stored fields and term vectors) with
+ * other segments. This is -1 if it does not share. */
+ public int docStoreOffset = -1;
+
+ /** String of the shared doc store segment, or null if
+ * this segment does not share the doc store files. */
+ public String docStoreSegment;
+
+ /** True if the shared doc store files are compound file
+ * format. */
+ public boolean docStoreCompoundFile;
+
+ /** True if this segment has pending deletions. */
+ public boolean hasDeletions;
+
+ /** Name of the current deletions file name. */
+ public String deletionsFileName;
+
+ /** Number of deleted documents. */
+ public int numDeleted;
+
+ /** True if we were able to open a SegmentReader on this
+ * segment. */
+ public boolean openReaderPassed;
+
+ /** Number of fields in this segment. */
+ int numFields;
+
+ /** True if at least one of the fields in this segment
+ * has position data
+ * @see AbstractField#setIndexOptions(org.apache.lucene.index.FieldInfo.IndexOptions) */
+ public boolean hasProx;
+
+ /** Map that includes certain
+ * debugging details that IndexWriter records into
+ * each segment it creates */
+ public Map diagnostics;
+
+ /** Status for testing of field norms (null if field norms could not be tested). */
+ public FieldNormStatus fieldNormStatus;
+
+ /** Status for testing of indexed terms (null if indexed terms could not be tested). */
+ public TermIndexStatus termIndexStatus;
+
+ /** Status for testing of stored fields (null if stored fields could not be tested). */
+ public StoredFieldStatus storedFieldStatus;
+
+ /** Status for testing of term vectors (null if term vectors could not be tested). */
+ public TermVectorStatus termVectorStatus;
+ }
+
+ /**
+ * Status from testing field norms.
+ */
+ public static final class FieldNormStatus {
+ /** Number of fields successfully tested */
+ public long totFields = 0L;
+
+ /** Exception thrown during term index test (null on success) */
+ public Throwable error = null;
+ }
+
+ /**
+ * Status from testing term index.
+ */
+ public static final class TermIndexStatus {
+ /** Total term count */
+ public long termCount = 0L;
+
+ /** Total frequency across all terms. */
+ public long totFreq = 0L;
+
+ /** Total number of positions. */
+ public long totPos = 0L;
+
+ /** Exception thrown during term index test (null on success) */
+ public Throwable error = null;
+ }
+
+ /**
+ * Status from testing stored fields.
+ */
+ public static final class StoredFieldStatus {
+
+ /** Number of documents tested. */
+ public int docCount = 0;
+
+ /** Total number of stored fields tested. */
+ public long totFields = 0;
+
+ /** Exception thrown during stored fields test (null on success) */
+ public Throwable error = null;
+ }
+
+ /**
+ * Status from testing stored fields.
+ */
+ public static final class TermVectorStatus {
+
+ /** Number of documents tested. */
+ public int docCount = 0;
+
+ /** Total number of term vectors tested. */
+ public long totVectors = 0;
+
+ /** Exception thrown during term vector test (null on success) */
+ public Throwable error = null;
+ }
+ }
+
+ /** Create a new CheckIndex on the directory. */
+ public CheckIndex(Directory dir) {
+ this.dir = dir;
+ infoStream = null;
+ }
+
+ /** Set infoStream where messages should go. If null, no
+ * messages are printed */
+ public void setInfoStream(PrintStream out) {
+ infoStream = out;
+ }
+
+ private void msg(String msg) {
+ if (infoStream != null)
+ infoStream.println(msg);
+ }
+
+ private static class MySegmentTermDocs extends SegmentTermDocs {
+
+ int delCount;
+
+ MySegmentTermDocs(SegmentReader p) {
+ super(p);
+ }
+
+ @Override
+ public void seek(Term term) throws IOException {
+ super.seek(term);
+ delCount = 0;
+ }
+
+ @Override
+ protected void skippingDoc() throws IOException {
+ delCount++;
+ }
+ }
+
+ /** Returns a {@link Status} instance detailing
+ * the state of the index.
+ *
+ * As this method checks every byte in the index, on a large
+ * index it can take quite a long time to run.
+ *
+ *
WARNING: make sure
+ * you only call this when the index is not opened by any
+ * writer. */
+ public Status checkIndex() throws IOException {
+ return checkIndex(null);
+ }
+
+ /** Returns a {@link Status} instance detailing
+ * the state of the index.
+ *
+ * @param onlySegments list of specific segment names to check
+ *
+ *
As this method checks every byte in the specified
+ * segments, on a large index it can take quite a long
+ * time to run.
+ *
+ *
WARNING: make sure
+ * you only call this when the index is not opened by any
+ * writer. */
+ public Status checkIndex(List onlySegments) throws IOException {
+ NumberFormat nf = NumberFormat.getInstance();
+ SegmentInfos sis = new SegmentInfos();
+ Status result = new Status();
+ result.dir = dir;
+ try {
+ sis.read(dir);
+ } catch (Throwable t) {
+ msg("ERROR: could not read any segments file in directory");
+ result.missingSegments = true;
+ if (infoStream != null)
+ t.printStackTrace(infoStream);
+ return result;
+ }
+
+ // find the oldest and newest segment versions
+ String oldest = Integer.toString(Integer.MAX_VALUE), newest = Integer.toString(Integer.MIN_VALUE);
+ String oldSegs = null;
+ boolean foundNonNullVersion = false;
+ Comparator versionComparator = StringHelper.getVersionComparator();
+ for (SegmentInfo si : sis) {
+ String version = si.getVersion();
+ if (version == null) {
+ // pre-3.1 segment
+ oldSegs = "pre-3.1";
+ } else if (version.equals("2.x")) {
+ // an old segment that was 'touched' by 3.1+ code
+ oldSegs = "2.x";
+ } else {
+ foundNonNullVersion = true;
+ if (versionComparator.compare(version, oldest) < 0) {
+ oldest = version;
+ }
+ if (versionComparator.compare(version, newest) > 0) {
+ newest = version;
+ }
+ }
+ }
+
+ final int numSegments = sis.size();
+ final String segmentsFileName = sis.getCurrentSegmentFileName();
+ IndexInput input = null;
+ try {
+ input = dir.openInput(segmentsFileName);
+ } catch (Throwable t) {
+ msg("ERROR: could not open segments file in directory");
+ if (infoStream != null)
+ t.printStackTrace(infoStream);
+ result.cantOpenSegments = true;
+ return result;
+ }
+ int format = 0;
+ try {
+ format = input.readInt();
+ } catch (Throwable t) {
+ msg("ERROR: could not read segment file version in directory");
+ if (infoStream != null)
+ t.printStackTrace(infoStream);
+ result.missingSegmentVersion = true;
+ return result;
+ } finally {
+ if (input != null)
+ input.close();
+ }
+
+ String sFormat = "";
+ boolean skip = false;
+
+ if (format == SegmentInfos.FORMAT)
+ sFormat = "FORMAT [Lucene Pre-2.1]";
+ if (format == SegmentInfos.FORMAT_LOCKLESS)
+ sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
+ else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
+ sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
+ else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
+ sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
+ else {
+ if (format == SegmentInfos.FORMAT_CHECKSUM)
+ sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
+ else if (format == SegmentInfos.FORMAT_DEL_COUNT)
+ sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
+ else if (format == SegmentInfos.FORMAT_HAS_PROX)
+ sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
+ else if (format == SegmentInfos.FORMAT_USER_DATA)
+ sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
+ else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
+ sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
+ else if (format == SegmentInfos.FORMAT_HAS_VECTORS)
+ sFormat = "FORMAT_HAS_VECTORS [Lucene 3.1]";
+ else if (format == SegmentInfos.FORMAT_3_1)
+ sFormat = "FORMAT_3_1 [Lucene 3.1+]";
+ else if (format == SegmentInfos.CURRENT_FORMAT)
+ throw new RuntimeException("BUG: You should update this tool!");
+ else if (format < SegmentInfos.CURRENT_FORMAT) {
+ sFormat = "int=" + format + " [newer version of Lucene than this tool]";
+ skip = true;
+ } else {
+ sFormat = format + " [Lucene 1.3 or prior]";
+ }
+ }
+
+ result.segmentsFileName = segmentsFileName;
+ result.numSegments = numSegments;
+ result.segmentFormat = sFormat;
+ result.userData = sis.getUserData();
+ String userDataString;
+ if (sis.getUserData().size() > 0) {
+ userDataString = " userData=" + sis.getUserData();
+ } else {
+ userDataString = "";
+ }
+
+ String versionString = null;
+ if (oldSegs != null) {
+ if (foundNonNullVersion) {
+ versionString = "versions=[" + oldSegs + " .. " + newest + "]";
+ } else {
+ versionString = "version=" + oldSegs;
+ }
+ } else {
+ versionString = oldest.equals(newest) ? ( "version=" + oldest ) : ("versions=[" + oldest + " .. " + newest + "]");
+ }
+
+ msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments
+ + " " + versionString + " format=" + sFormat + userDataString);
+
+ if (onlySegments != null) {
+ result.partial = true;
+ if (infoStream != null)
+ infoStream.print("\nChecking only these segments:");
+ for (String s : onlySegments) {
+ if (infoStream != null)
+ infoStream.print(" " + s);
+ }
+ result.segmentsChecked.addAll(onlySegments);
+ msg(":");
+ }
+
+ if (skip) {
+ msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
+ result.toolOutOfDate = true;
+ return result;
+ }
+
+
+ result.newSegments = (SegmentInfos) sis.clone();
+ result.newSegments.clear();
+ result.maxSegmentName = -1;
+
+ for(int i=0;i result.maxSegmentName) {
+ result.maxSegmentName = segmentName;
+ }
+ if (onlySegments != null && !onlySegments.contains(info.name))
+ continue;
+ Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
+ result.segmentInfos.add(segInfoStat);
+ msg(" " + (1+i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
+ segInfoStat.name = info.name;
+ segInfoStat.docCount = info.docCount;
+
+ int toLoseDocCount = info.docCount;
+
+ SegmentReader reader = null;
+
+ try {
+ msg(" compound=" + info.getUseCompoundFile());
+ segInfoStat.compound = info.getUseCompoundFile();
+ msg(" hasProx=" + info.getHasProx());
+ segInfoStat.hasProx = info.getHasProx();
+ msg(" numFiles=" + info.files().size());
+ segInfoStat.numFiles = info.files().size();
+ segInfoStat.sizeMB = info.sizeInBytes(true)/(1024.*1024.);
+ msg(" size (MB)=" + nf.format(segInfoStat.sizeMB));
+ Map diagnostics = info.getDiagnostics();
+ segInfoStat.diagnostics = diagnostics;
+ if (diagnostics.size() > 0) {
+ msg(" diagnostics = " + diagnostics);
+ }
+
+ final int docStoreOffset = info.getDocStoreOffset();
+ if (docStoreOffset != -1) {
+ msg(" docStoreOffset=" + docStoreOffset);
+ segInfoStat.docStoreOffset = docStoreOffset;
+ msg(" docStoreSegment=" + info.getDocStoreSegment());
+ segInfoStat.docStoreSegment = info.getDocStoreSegment();
+ msg(" docStoreIsCompoundFile=" + info.getDocStoreIsCompoundFile());
+ segInfoStat.docStoreCompoundFile = info.getDocStoreIsCompoundFile();
+ }
+ final String delFileName = info.getDelFileName();
+ if (delFileName == null){
+ msg(" no deletions");
+ segInfoStat.hasDeletions = false;
+ }
+ else{
+ msg(" has deletions [delFileName=" + delFileName + "]");
+ segInfoStat.hasDeletions = true;
+ segInfoStat.deletionsFileName = delFileName;
+ }
+ if (infoStream != null)
+ infoStream.print(" test: open reader.........");
+ reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
+
+ segInfoStat.openReaderPassed = true;
+
+ final int numDocs = reader.numDocs();
+ toLoseDocCount = numDocs;
+ if (reader.hasDeletions()) {
+ if (reader.deletedDocs.count() != info.getDelCount()) {
+ throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.count());
+ }
+ if (reader.deletedDocs.count() > reader.maxDoc()) {
+ throw new RuntimeException("too many deleted docs: maxDoc()=" + reader.maxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.count());
+ }
+ if (info.docCount - numDocs != info.getDelCount()){
+ throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.docCount - numDocs));
+ }
+ segInfoStat.numDeleted = info.docCount - numDocs;
+ msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
+ } else {
+ if (info.getDelCount() != 0) {
+ throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.docCount - numDocs));
+ }
+ msg("OK");
+ }
+ if (reader.maxDoc() != info.docCount)
+ throw new RuntimeException("SegmentReader.maxDoc() " + reader.maxDoc() + " != SegmentInfos.docCount " + info.docCount);
+
+ // Test getFieldNames()
+ if (infoStream != null) {
+ infoStream.print(" test: fields..............");
+ }
+ Collection fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL);
+ msg("OK [" + fieldNames.size() + " fields]");
+ segInfoStat.numFields = fieldNames.size();
+
+ // Test Field Norms
+ segInfoStat.fieldNormStatus = testFieldNorms(fieldNames, reader);
+
+ // Test the Term Index
+ segInfoStat.termIndexStatus = testTermIndex(info, reader);
+
+ // Test Stored Fields
+ segInfoStat.storedFieldStatus = testStoredFields(info, reader, nf);
+
+ // Test Term Vectors
+ segInfoStat.termVectorStatus = testTermVectors(info, reader, nf);
+
+ // Rethrow the first exception we encountered
+ // This will cause stats for failed segments to be incremented properly
+ if (segInfoStat.fieldNormStatus.error != null) {
+ throw new RuntimeException("Field Norm test failed");
+ } else if (segInfoStat.termIndexStatus.error != null) {
+ throw new RuntimeException("Term Index test failed");
+ } else if (segInfoStat.storedFieldStatus.error != null) {
+ throw new RuntimeException("Stored Field test failed");
+ } else if (segInfoStat.termVectorStatus.error != null) {
+ throw new RuntimeException("Term Vector test failed");
+ }
+
+ msg("");
+
+ } catch (Throwable t) {
+ msg("FAILED");
+ String comment;
+ comment = "fixIndex() would remove reference to this segment";
+ msg(" WARNING: " + comment + "; full exception:");
+ if (infoStream != null)
+ t.printStackTrace(infoStream);
+ msg("");
+ result.totLoseDocCount += toLoseDocCount;
+ result.numBadSegments++;
+ continue;
+ } finally {
+ if (reader != null)
+ reader.close();
+ }
+
+ // Keeper
+ result.newSegments.add((SegmentInfo) info.clone());
+ }
+
+ if (0 == result.numBadSegments) {
+ result.clean = true;
+ } else
+ msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
+
+ if ( ! (result.validCounter = (result.maxSegmentName < sis.counter))) {
+ result.clean = false;
+ result.newSegments.counter = result.maxSegmentName + 1;
+ msg("ERROR: Next segment name counter " + sis.counter + " is not greater than max segment name " + result.maxSegmentName);
+ }
+
+ if (result.clean) {
+ msg("No problems were detected with this index.\n");
+ }
+
+ return result;
+ }
+
+ /**
+ * Test field norms.
+ */
+ private Status.FieldNormStatus testFieldNorms(Collection fieldNames, SegmentReader reader) {
+ final Status.FieldNormStatus status = new Status.FieldNormStatus();
+
+ try {
+ // Test Field Norms
+ if (infoStream != null) {
+ infoStream.print(" test: field norms.........");
+ }
+ final byte[] b = new byte[reader.maxDoc()];
+ for (final String fieldName : fieldNames) {
+ if (reader.hasNorms(fieldName)) {
+ reader.norms(fieldName, b, 0);
+ ++status.totFields;
+ }
+ }
+
+ msg("OK [" + status.totFields + " fields]");
+ } catch (Throwable e) {
+ msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
+ status.error = e;
+ if (infoStream != null) {
+ e.printStackTrace(infoStream);
+ }
+ }
+
+ return status;
+ }
+
+ /**
+ * Test the term index.
+ */
+ private Status.TermIndexStatus testTermIndex(SegmentInfo info, SegmentReader reader) {
+ final Status.TermIndexStatus status = new Status.TermIndexStatus();
+
+ final IndexSearcher is = new IndexSearcher(reader);
+
+ try {
+ if (infoStream != null) {
+ infoStream.print(" test: terms, freq, prox...");
+ }
+
+ final TermEnum termEnum = reader.terms();
+ final TermPositions termPositions = reader.termPositions();
+
+ // Used only to count up # deleted docs for this term
+ final MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);
+
+ final int maxDoc = reader.maxDoc();
+ Term lastTerm = null;
+ while (termEnum.next()) {
+ status.termCount++;
+ final Term term = termEnum.term();
+ lastTerm = term;
+
+ final int docFreq = termEnum.docFreq();
+ if (docFreq <= 0) {
+ throw new RuntimeException("docfreq: " + docFreq + " is out of bounds");
+ }
+ termPositions.seek(term);
+ int lastDoc = -1;
+ int freq0 = 0;
+ status.totFreq += docFreq;
+ while (termPositions.next()) {
+ freq0++;
+ final int doc = termPositions.doc();
+ final int freq = termPositions.freq();
+ if (doc <= lastDoc)
+ throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
+ if (doc >= maxDoc)
+ throw new RuntimeException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
+
+ lastDoc = doc;
+ if (freq <= 0)
+ throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
+
+ int lastPos = -1;
+ status.totPos += freq;
+ for(int j=0;jWARNING: this writes a
+ * new segments file into the index, effectively removing
+ * all documents in broken segments from the index.
+ * BE CAREFUL.
+ *
+ * WARNING: Make sure you only call this when the
+ * index is not opened by any writer. */
+ public void fixIndex(Status result) throws IOException {
+ if (result.partial)
+ throw new IllegalArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)");
+ result.newSegments.changed();
+ result.newSegments.commit(result.dir);
+ }
+
+ private static boolean assertsOn;
+
+ private static boolean testAsserts() {
+ assertsOn = true;
+ return true;
+ }
+
+ private static boolean assertsOn() {
+ assert testAsserts();
+ return assertsOn;
+ }
+
+ /** Command-line interface to check and fix an index.
+
+
+ Run it like this:
+
+ java -ea:org.apache.lucene... org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]
+
+
+ -fix
: actually write a new segments_N file, removing any problematic segments
+
+ -segment X
: only check the specified
+ segment(s). This can be specified multiple times,
+ to check more than one segment, eg -segment _2
+ -segment _a
. You can't use this with the -fix
+ option.
+
+
+ WARNING: -fix
should only be used on an emergency basis as it will cause
+ documents (perhaps many) to be permanently removed from the index. Always make
+ a backup copy of your index before running this! Do not run this tool on an index
+ that is actively being written to. You have been warned!
+
+
Run without -fix, this tool will open the index, report version information
+ and report any exceptions it hits and what action it would take if -fix were
+ specified. With -fix, this tool will remove any segments that have issues and
+ write a new segments_N file. This means all documents contained in the affected
+ segments will be removed.
+
+
+ This tool exits with exit code 1 if the index cannot be opened or has any
+ corruption, else 0.
+ */
+ public static void main(String[] args) throws IOException, InterruptedException {
+
+ boolean doFix = false;
+ List onlySegments = new ArrayList();
+ String indexPath = null;
+ int i = 0;
+ while(i < args.length) {
+ if (args[i].equals("-fix")) {
+ doFix = true;
+ i++;
+ } else if (args[i].equals("-segment")) {
+ if (i == args.length-1) {
+ System.out.println("ERROR: missing name for -segment option");
+ System.exit(1);
+ }
+ onlySegments.add(args[i+1]);
+ i += 2;
+ } else {
+ if (indexPath != null) {
+ System.out.println("ERROR: unexpected extra argument '" + args[i] + "'");
+ System.exit(1);
+ }
+ indexPath = args[i];
+ i++;
+ }
+ }
+
+ if (indexPath == null) {
+ System.out.println("\nERROR: index path not specified");
+ System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" +
+ "\n" +
+ " -fix: actually write a new segments_N file, removing any problematic segments\n" +
+ " -segment X: only check the specified segments. This can be specified multiple\n" +
+ " times, to check more than one segment, eg '-segment _2 -segment _a'.\n" +
+ " You can't use this with the -fix option\n" +
+ "\n" +
+ "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" +
+ "documents (perhaps many) to be permanently removed from the index. Always make\n" +
+ "a backup copy of your index before running this! Do not run this tool on an index\n" +
+ "that is actively being written to. You have been warned!\n" +
+ "\n" +
+ "Run without -fix, this tool will open the index, report version information\n" +
+ "and report any exceptions it hits and what action it would take if -fix were\n" +
+ "specified. With -fix, this tool will remove any segments that have issues and\n" +
+ "write a new segments_N file. This means all documents contained in the affected\n" +
+ "segments will be removed.\n" +
+ "\n" +
+ "This tool exits with exit code 1 if the index cannot be opened or has any\n" +
+ "corruption, else 0.\n");
+ System.exit(1);
+ }
+
+ if (!assertsOn())
+ System.out.println("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled");
+
+ if (onlySegments.size() == 0)
+ onlySegments = null;
+ else if (doFix) {
+ System.out.println("ERROR: cannot specify both -fix and -segment");
+ System.exit(1);
+ }
+
+ System.out.println("\nOpening index @ " + indexPath + "\n");
+ Directory dir = null;
+ try {
+ dir = FSDirectory.open(new File(indexPath));
+ } catch (Throwable t) {
+ System.out.println("ERROR: could not open directory \"" + indexPath + "\"; exiting");
+ t.printStackTrace(System.out);
+ System.exit(1);
+ }
+
+ CheckIndex checker = new CheckIndex(dir);
+ checker.setInfoStream(System.out);
+
+ Status result = checker.checkIndex(onlySegments);
+ if (result.missingSegments) {
+ System.exit(1);
+ }
+
+ if (!result.clean) {
+ if (!doFix) {
+ System.out.println("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n");
+ } else {
+ System.out.println("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
+ System.out.println("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
+ for(int s=0;s<5;s++) {
+ Thread.sleep(1000);
+ System.out.println(" " + (5-s) + "...");
+ }
+ System.out.println("Writing...");
+ checker.fixIndex(result);
+ System.out.println("OK");
+ System.out.println("Wrote new segments file \"" + result.newSegments.getCurrentSegmentFileName() + "\"");
+ }
+ }
+ System.out.println("");
+
+ final int exitCode;
+ if (result.clean == true)
+ exitCode = 0;
+ else
+ exitCode = 1;
+ System.exit(exitCode);
+ }
+}