1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.store.Directory;
21 import org.apache.lucene.store.IndexOutput;
22 import org.apache.lucene.store.IndexInput;
23 import org.apache.lucene.util.BitVector;
24 import org.apache.lucene.util.Constants;
26 import java.io.IOException;
27 import java.util.HashSet;
28 import java.util.List;
30 import java.util.HashMap;
31 import java.util.ArrayList;
32 import java.util.Collections;
36 * Information about a segment such as it's name, directory, and files related
39 * @lucene.experimental
41 public final class SegmentInfo implements Cloneable {
43 static final int NO = -1; // e.g. no norms; no deletes;
44 static final int YES = 1; // e.g. have norms; have deletes;
45 static final int CHECK_DIR = 0; // e.g. must check dir to see if there are norms/deletions
46 static final int WITHOUT_GEN = 0; // a file name that has no GEN in it.
48 public String name; // unique name in dir
49 public int docCount; // number of docs in seg
50 public Directory dir; // where segment resides
52 private boolean preLockless; // true if this is a segments file written before
53 // lock-less commits (2.1)
55 private long delGen; // current generation of del file; NO if there
56 // are no deletes; CHECK_DIR if it's a pre-2.1 segment
57 // (and we must check filesystem); YES or higher if
58 // there are deletes at generation N
60 private long[] normGen; // current generation of each field's norm file.
61 // If this array is null, for lockLess this means no
62 // separate norms. For preLockLess this means we must
63 // check filesystem. If this array is not null, its
64 // values mean: NO says this field has no separate
65 // norms; CHECK_DIR says it is a preLockLess segment and
66 // filesystem must be checked; >= YES says this field
67 // has separate norms with the specified generation
69 private byte isCompoundFile; // NO if it is not; YES if it is; CHECK_DIR if it's
70 // pre-2.1 (ie, must check file system to see
71 // if <name>.cfs and <name>.nrm exist)
73 private boolean hasSingleNormFile; // true if this segment maintains norms in a single file;
75 // this is currently false for segments populated by DocumentWriter
76 // and true for newly created merged segments (both
77 // compound and non compound).
79 private volatile List<String> files; // cached list of files that this segment uses
82 private volatile long sizeInBytesNoStore = -1; // total byte size of all but the store files (computed on demand)
83 private volatile long sizeInBytesWithStore = -1; // total byte size of all of our files (computed on demand)
85 private int docStoreOffset; // if this segment shares stored fields & vectors, this
86 // offset is where in that file this segment's docs begin
87 private String docStoreSegment; // name used to derive fields/vectors file we share with
89 private boolean docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx)
91 private int delCount; // How many deleted docs in this segment, or -1 if not yet known
92 // (if it's an older index)
94 private boolean hasProx; // True if this segment has any fields with omitTermFreqAndPositions==false
96 private boolean hasVectors; // True if this segment wrote term vectors
98 private Map<String,String> diagnostics;
100 // Tracks the Lucene version this segment was created with, since 3.1. The
101 // format expected is "x.y" - "2.x" for pre-3.0 indexes, and specific versions
102 // afterwards ("3.0", "3.1" etc.).
103 // see Constants.LUCENE_MAIN_VERSION.
104 private String version;
106 // NOTE: only used in-RAM by IW to track buffered deletes;
107 // this is never written to/read from the Directory
108 private long bufferedDeletesGen;
110 public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile,
111 boolean hasProx, boolean hasVectors) {
113 this.docCount = docCount;
116 this.isCompoundFile = (byte) (isCompoundFile ? YES : NO);
118 this.hasSingleNormFile = hasSingleNormFile;
119 this.docStoreOffset = -1;
121 this.hasProx = hasProx;
122 this.hasVectors = hasVectors;
123 this.version = Constants.LUCENE_MAIN_VERSION;
127 * Copy everything from src SegmentInfo into our instance.
129 void reset(SegmentInfo src) {
131 version = src.version;
133 docCount = src.docCount;
135 preLockless = src.preLockless;
137 docStoreOffset = src.docStoreOffset;
138 docStoreIsCompoundFile = src.docStoreIsCompoundFile;
139 hasVectors = src.hasVectors;
140 hasProx = src.hasProx;
141 if (src.normGen == null) {
144 normGen = new long[src.normGen.length];
145 System.arraycopy(src.normGen, 0, normGen, 0, src.normGen.length);
147 isCompoundFile = src.isCompoundFile;
148 hasSingleNormFile = src.hasSingleNormFile;
149 delCount = src.delCount;
152 void setDiagnostics(Map<String, String> diagnostics) {
153 this.diagnostics = diagnostics;
156 public Map<String, String> getDiagnostics() {
161 * Construct a new SegmentInfo instance by reading a
162 * previously saved SegmentInfo from input.
164 * @param dir directory to load from
165 * @param format format of the segments info file
166 * @param input input handle to read segment info from
168 SegmentInfo(Directory dir, int format, IndexInput input) throws IOException {
170 if (format <= SegmentInfos.FORMAT_3_1) {
171 version = input.readString();
173 name = input.readString();
174 docCount = input.readInt();
175 if (format <= SegmentInfos.FORMAT_LOCKLESS) {
176 delGen = input.readLong();
177 if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE) {
178 docStoreOffset = input.readInt();
179 if (docStoreOffset != -1) {
180 docStoreSegment = input.readString();
181 docStoreIsCompoundFile = (1 == input.readByte());
183 docStoreSegment = name;
184 docStoreIsCompoundFile = false;
188 docStoreSegment = name;
189 docStoreIsCompoundFile = false;
191 if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) {
192 hasSingleNormFile = (1 == input.readByte());
194 hasSingleNormFile = false;
196 int numNormGen = input.readInt();
197 if (numNormGen == NO) {
200 normGen = new long[numNormGen];
201 for(int j=0;j<numNormGen;j++) {
202 normGen[j] = input.readLong();
205 isCompoundFile = input.readByte();
206 preLockless = (isCompoundFile == CHECK_DIR);
207 if (format <= SegmentInfos.FORMAT_DEL_COUNT) {
208 delCount = input.readInt();
209 assert delCount <= docCount;
212 if (format <= SegmentInfos.FORMAT_HAS_PROX)
213 hasProx = input.readByte() == 1;
217 if (format <= SegmentInfos.FORMAT_DIAGNOSTICS) {
218 diagnostics = input.readStringStringMap();
220 diagnostics = Collections.<String,String>emptyMap();
223 if (format <= SegmentInfos.FORMAT_HAS_VECTORS) {
224 hasVectors = input.readByte() == 1;
226 final String storesSegment;
228 final boolean isCompoundFile;
229 if (docStoreOffset != -1) {
230 storesSegment = docStoreSegment;
231 isCompoundFile = docStoreIsCompoundFile;
232 ext = IndexFileNames.COMPOUND_FILE_STORE_EXTENSION;
234 storesSegment = name;
235 isCompoundFile = getUseCompoundFile();
236 ext = IndexFileNames.COMPOUND_FILE_EXTENSION;
238 final Directory dirToTest;
239 if (isCompoundFile) {
240 dirToTest = new CompoundFileReader(dir, IndexFileNames.segmentFileName(storesSegment, ext));
245 hasVectors = dirToTest.fileExists(IndexFileNames.segmentFileName(storesSegment, IndexFileNames.VECTORS_INDEX_EXTENSION));
247 if (isCompoundFile) {
255 isCompoundFile = CHECK_DIR;
257 hasSingleNormFile = false;
259 docStoreIsCompoundFile = false;
260 docStoreSegment = null;
263 diagnostics = Collections.<String,String>emptyMap();
267 void setNumFields(int numFields) {
268 if (normGen == null) {
269 // normGen is null if we loaded a pre-2.1 segment
270 // file, or, if this segments file hasn't had any
271 // norms set against it yet:
272 normGen = new long[numFields];
275 // Do nothing: thus leaving normGen[k]==CHECK_DIR (==0), so that later we know
276 // we have to check filesystem for norm files, because this is prelockless.
279 // This is a FORMAT_LOCKLESS segment, which means
280 // there are no separate norms:
281 for(int i=0;i<numFields;i++) {
289 * Returns total size in bytes of all of files used by this segment (if
290 * {@code includeDocStores} is true), or the size of all files except the store
293 public long sizeInBytes(boolean includeDocStores) throws IOException {
294 if (includeDocStores) {
295 if (sizeInBytesWithStore != -1) {
296 return sizeInBytesWithStore;
299 for (final String fileName : files()) {
300 // We don't count bytes used by a shared doc store
301 // against this segment
302 if (docStoreOffset == -1 || !IndexFileNames.isDocStoreFile(fileName)) {
303 sum += dir.fileLength(fileName);
306 sizeInBytesWithStore = sum;
307 return sizeInBytesWithStore;
309 if (sizeInBytesNoStore != -1) {
310 return sizeInBytesNoStore;
313 for (final String fileName : files()) {
314 if (IndexFileNames.isDocStoreFile(fileName)) {
317 sum += dir.fileLength(fileName);
319 sizeInBytesNoStore = sum;
320 return sizeInBytesNoStore;
324 public boolean getHasVectors() throws IOException {
328 public void setHasVectors(boolean v) {
333 public boolean hasDeletions()
337 // delGen == NO: this means this segment was written
338 // by the LOCKLESS code and for certain does not have
341 // delGen == CHECK_DIR: this means this segment was written by
342 // pre-LOCKLESS code which means we must check
343 // directory to see if .del file exists
345 // delGen >= YES: this means this segment was written by
346 // the LOCKLESS code and for certain has
351 } else if (delGen >= YES) {
354 return dir.fileExists(getDelFileName());
358 void advanceDelGen() {
359 // delGen 0 is reserved for pre-LOCKLESS format
374 public Object clone() {
375 SegmentInfo si = new SegmentInfo(name, docCount, dir, false, hasSingleNormFile,
376 hasProx, hasVectors);
377 si.docStoreOffset = docStoreOffset;
378 si.docStoreSegment = docStoreSegment;
379 si.docStoreIsCompoundFile = docStoreIsCompoundFile;
381 si.delCount = delCount;
382 si.preLockless = preLockless;
383 si.isCompoundFile = isCompoundFile;
384 si.diagnostics = new HashMap<String, String>(diagnostics);
385 if (normGen != null) {
386 si.normGen = normGen.clone();
388 si.version = version;
392 public String getDelFileName() {
394 // In this case we know there is no deletion filename
395 // against this segment
398 // If delGen is CHECK_DIR, it's the pre-lockless-commit file format
399 return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen);
404 * Returns true if this field for this segment has saved a separate norms file (_<segment>_N.sX).
406 * @param fieldNumber the field index to check
408 public boolean hasSeparateNorms(int fieldNumber)
410 if ((normGen == null && preLockless) || (normGen != null && normGen[fieldNumber] == CHECK_DIR)) {
411 // Must fallback to directory file exists check:
412 String fileName = name + ".s" + fieldNumber;
413 return dir.fileExists(fileName);
414 } else if (normGen == null || normGen[fieldNumber] == NO) {
422 * Returns true if any fields in this segment have separate norms.
424 public boolean hasSeparateNorms()
426 if (normGen == null) {
428 // This means we were created w/ LOCKLESS code and no
429 // norms are written yet:
432 // This means this segment was saved with pre-LOCKLESS
433 // code. So we must fallback to the original
434 // directory list check:
435 String[] result = dir.listAll();
437 throw new IOException("cannot read directory " + dir + ": listAll() returned null");
439 final IndexFileNameFilter filter = IndexFileNameFilter.getFilter();
441 pattern = name + ".s";
442 int patternLength = pattern.length();
443 for(int i = 0; i < result.length; i++){
444 String fileName = result[i];
445 if (filter.accept(null, fileName) && fileName.startsWith(pattern) && Character.isDigit(fileName.charAt(patternLength)))
451 // This means this segment was saved with LOCKLESS
452 // code so we first check whether any normGen's are >= 1
453 // (meaning they definitely have separate norms):
454 for(int i=0;i<normGen.length;i++) {
455 if (normGen[i] >= YES) {
459 // Next we look for any == 0. These cases were
460 // pre-LOCKLESS and must be checked in directory:
461 for(int i=0;i<normGen.length;i++) {
462 if (normGen[i] == CHECK_DIR) {
463 if (hasSeparateNorms(i)) {
474 * Increment the generation count for the norms file for
477 * @param fieldIndex field whose norm file will be rewritten
479 void advanceNormGen(int fieldIndex) {
480 if (normGen[fieldIndex] == NO) {
481 normGen[fieldIndex] = YES;
483 normGen[fieldIndex]++;
489 * Get the file name for the norms file for this field.
491 * @param number field index
493 public String getNormFileName(int number) throws IOException {
495 if (normGen == null) {
498 gen = normGen[number];
501 if (hasSeparateNorms(number)) {
502 // case 1: separate norm
503 return IndexFileNames.fileNameFromGeneration(name, "s" + number, gen);
506 if (hasSingleNormFile) {
507 // case 2: lockless (or nrm file exists) - single file for all norms
508 return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.NORMS_EXTENSION, WITHOUT_GEN);
511 // case 3: norm file for each field
512 return IndexFileNames.fileNameFromGeneration(name, "f" + number, WITHOUT_GEN);
516 * Mark whether this segment is stored as a compound file.
518 * @param isCompoundFile true if this is a compound file;
521 void setUseCompoundFile(boolean isCompoundFile) {
522 if (isCompoundFile) {
523 this.isCompoundFile = YES;
525 this.isCompoundFile = NO;
531 * Returns true if this segment is stored as a compound
534 public boolean getUseCompoundFile() throws IOException {
535 if (isCompoundFile == NO) {
537 } else if (isCompoundFile == YES) {
540 return dir.fileExists(IndexFileNames.segmentFileName(name, IndexFileNames.COMPOUND_FILE_EXTENSION));
544 public int getDelCount() throws IOException {
545 if (delCount == -1) {
546 if (hasDeletions()) {
547 final String delFileName = getDelFileName();
548 delCount = new BitVector(dir, delFileName).count();
552 assert delCount <= docCount;
556 void setDelCount(int delCount) {
557 this.delCount = delCount;
558 assert delCount <= docCount;
561 public int getDocStoreOffset() {
562 return docStoreOffset;
565 public boolean getDocStoreIsCompoundFile() {
566 return docStoreIsCompoundFile;
569 void setDocStoreIsCompoundFile(boolean v) {
570 docStoreIsCompoundFile = v;
574 public String getDocStoreSegment() {
575 return docStoreSegment;
578 public void setDocStoreSegment(String segment) {
579 docStoreSegment = segment;
582 void setDocStoreOffset(int offset) {
583 docStoreOffset = offset;
587 void setDocStore(int offset, String segment, boolean isCompoundFile) {
588 docStoreOffset = offset;
589 docStoreSegment = segment;
590 docStoreIsCompoundFile = isCompoundFile;
595 * Save this segment's info.
597 void write(IndexOutput output)
599 assert delCount <= docCount: "delCount=" + delCount + " docCount=" + docCount + " segment=" + name;
600 // Write the Lucene version that created this segment, since 3.1
601 output.writeString(version);
602 output.writeString(name);
603 output.writeInt(docCount);
604 output.writeLong(delGen);
605 output.writeInt(docStoreOffset);
606 if (docStoreOffset != -1) {
607 output.writeString(docStoreSegment);
608 output.writeByte((byte) (docStoreIsCompoundFile ? 1:0));
611 output.writeByte((byte) (hasSingleNormFile ? 1:0));
612 if (normGen == null) {
615 output.writeInt(normGen.length);
616 for(int j = 0; j < normGen.length; j++) {
617 output.writeLong(normGen[j]);
620 output.writeByte(isCompoundFile);
621 output.writeInt(delCount);
622 output.writeByte((byte) (hasProx ? 1:0));
623 output.writeStringStringMap(diagnostics);
624 output.writeByte((byte) (hasVectors ? 1 : 0));
627 void setHasProx(boolean hasProx) {
628 this.hasProx = hasProx;
632 public boolean getHasProx() {
636 private void addIfExists(Set<String> files, String fileName) throws IOException {
637 if (dir.fileExists(fileName))
642 * Return all files referenced by this SegmentInfo. The
643 * returns List is a locally cached List so you should not
647 public List<String> files() throws IOException {
654 HashSet<String> filesSet = new HashSet<String>();
656 boolean useCompoundFile = getUseCompoundFile();
658 if (useCompoundFile) {
659 filesSet.add(IndexFileNames.segmentFileName(name, IndexFileNames.COMPOUND_FILE_EXTENSION));
661 for (String ext : IndexFileNames.NON_STORE_INDEX_EXTENSIONS)
662 addIfExists(filesSet, IndexFileNames.segmentFileName(name, ext));
665 if (docStoreOffset != -1) {
666 // We are sharing doc stores (stored fields, term
667 // vectors) with other segments
668 assert docStoreSegment != null;
669 if (docStoreIsCompoundFile) {
670 filesSet.add(IndexFileNames.segmentFileName(docStoreSegment, IndexFileNames.COMPOUND_FILE_STORE_EXTENSION));
672 filesSet.add(IndexFileNames.segmentFileName(docStoreSegment, IndexFileNames.FIELDS_INDEX_EXTENSION));
673 filesSet.add(IndexFileNames.segmentFileName(docStoreSegment, IndexFileNames.FIELDS_EXTENSION));
675 filesSet.add(IndexFileNames.segmentFileName(docStoreSegment, IndexFileNames.VECTORS_INDEX_EXTENSION));
676 filesSet.add(IndexFileNames.segmentFileName(docStoreSegment, IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
677 filesSet.add(IndexFileNames.segmentFileName(docStoreSegment, IndexFileNames.VECTORS_FIELDS_EXTENSION));
680 } else if (!useCompoundFile) {
681 filesSet.add(IndexFileNames.segmentFileName(name, IndexFileNames.FIELDS_INDEX_EXTENSION));
682 filesSet.add(IndexFileNames.segmentFileName(name, IndexFileNames.FIELDS_EXTENSION));
684 filesSet.add(IndexFileNames.segmentFileName(name, IndexFileNames.VECTORS_INDEX_EXTENSION));
685 filesSet.add(IndexFileNames.segmentFileName(name, IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
686 filesSet.add(IndexFileNames.segmentFileName(name, IndexFileNames.VECTORS_FIELDS_EXTENSION));
690 String delFileName = IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen);
691 if (delFileName != null && (delGen >= YES || dir.fileExists(delFileName))) {
692 filesSet.add(delFileName);
695 // Careful logic for norms files
696 if (normGen != null) {
697 for(int i=0;i<normGen.length;i++) {
698 long gen = normGen[i];
700 // Definitely a separate norm file, with generation:
701 filesSet.add(IndexFileNames.fileNameFromGeneration(name, IndexFileNames.SEPARATE_NORMS_EXTENSION + i, gen));
702 } else if (NO == gen) {
703 // No separate norms but maybe plain norms
704 // in the non compound file case:
705 if (!hasSingleNormFile && !useCompoundFile) {
706 String fileName = IndexFileNames.segmentFileName(name, IndexFileNames.PLAIN_NORMS_EXTENSION + i);
707 if (dir.fileExists(fileName)) {
708 filesSet.add(fileName);
711 } else if (CHECK_DIR == gen) {
712 // Pre-2.1: we have to check file existence
713 String fileName = null;
714 if (useCompoundFile) {
715 fileName = IndexFileNames.segmentFileName(name, IndexFileNames.SEPARATE_NORMS_EXTENSION + i);
716 } else if (!hasSingleNormFile) {
717 fileName = IndexFileNames.segmentFileName(name, IndexFileNames.PLAIN_NORMS_EXTENSION + i);
719 if (fileName != null && dir.fileExists(fileName)) {
720 filesSet.add(fileName);
724 } else if (preLockless || (!hasSingleNormFile && !useCompoundFile)) {
725 // Pre-2.1: we have to scan the dir to find all
726 // matching _X.sN/_X.fN files for our segment:
729 prefix = IndexFileNames.segmentFileName(name, IndexFileNames.SEPARATE_NORMS_EXTENSION);
731 prefix = IndexFileNames.segmentFileName(name, IndexFileNames.PLAIN_NORMS_EXTENSION);
732 int prefixLength = prefix.length();
733 String[] allFiles = dir.listAll();
734 final IndexFileNameFilter filter = IndexFileNameFilter.getFilter();
735 for(int i=0;i<allFiles.length;i++) {
736 String fileName = allFiles[i];
737 if (filter.accept(null, fileName) && fileName.length() > prefixLength && Character.isDigit(fileName.charAt(prefixLength)) && fileName.startsWith(prefix)) {
738 filesSet.add(fileName);
742 return files = new ArrayList<String>(filesSet);
745 /* Called whenever any change is made that affects which
746 * files this segment has. */
747 private void clearFiles() {
749 sizeInBytesNoStore = -1;
750 sizeInBytesWithStore = -1;
755 public String toString() {
756 return toString(dir, 0);
759 /** Used for debugging. Format may suddenly change.
761 * <p>Current format looks like
762 * <code>_a(3.1):c45/4->_1</code>, which means the segment's
763 * name is <code>_a</code>; it was created with Lucene 3.1 (or
764 * '?' if it's unkown); it's using compound file
765 * format (would be <code>C</code> if not compound); it
766 * has 45 documents; it has 4 deletions (this part is
767 * left off when there are no deletions); it's using the
768 * shared doc stores named <code>_1</code> (this part is
769 * left off if doc stores are private).</p>
771 public String toString(Directory dir, int pendingDelCount) {
773 StringBuilder s = new StringBuilder();
774 s.append(name).append('(').append(version == null ? "?" : version).append(')').append(':');
778 if (getUseCompoundFile()) {
783 } catch (IOException ioe) {
788 if (this.dir != dir) {
798 delCount = getDelCount();
799 } catch (IOException ioe) {
802 if (delCount != -1) {
803 delCount += pendingDelCount;
807 if (delCount == -1) {
814 if (docStoreOffset != -1) {
815 s.append("->").append(docStoreSegment);
816 if (docStoreIsCompoundFile) {
821 s.append('+').append(docStoreOffset);
827 /** We consider another SegmentInfo instance equal if it
828 * has the same dir and same name. */
830 public boolean equals(Object obj) {
831 if (this == obj) return true;
832 if (obj instanceof SegmentInfo) {
833 final SegmentInfo other = (SegmentInfo) obj;
834 return other.dir == dir && other.name.equals(name);
841 public int hashCode() {
842 return dir.hashCode() + name.hashCode();
846 * Used by SegmentInfos to upgrade segments that do not record their code
847 * version (either "2.x" or "3.0").
849 * <b>NOTE:</b> this method is used for internal purposes only - you should
850 * not modify the version of a SegmentInfo, or it may result in unexpected
851 * exceptions thrown when you attempt to open the index.
855 void setVersion(String version) {
856 this.version = version;
859 /** Returns the version of the code which wrote the segment. */
860 public String getVersion() {
864 long getBufferedDeletesGen() {
865 return bufferedDeletesGen;
868 void setBufferedDeletesGen(long v) {
869 bufferedDeletesGen = v;