X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/index/SegmentInfo.java diff --git a/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/index/SegmentInfo.java deleted file mode 100644 index 5fb6537..0000000 --- a/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/index/SegmentInfo.java +++ /dev/null @@ -1,871 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.util.BitVector; -import org.apache.lucene.util.Constants; - -import java.io.IOException; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.HashMap; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Set; - -/** - * Information about a segment such as it's name, directory, and files related - * to the segment. - * - * @lucene.experimental - */ -public final class SegmentInfo implements Cloneable { - - static final int NO = -1; // e.g. no norms; no deletes; - static final int YES = 1; // e.g. have norms; have deletes; - static final int CHECK_DIR = 0; // e.g. must check dir to see if there are norms/deletions - static final int WITHOUT_GEN = 0; // a file name that has no GEN in it. - - public String name; // unique name in dir - public int docCount; // number of docs in seg - public Directory dir; // where segment resides - - private boolean preLockless; // true if this is a segments file written before - // lock-less commits (2.1) - - private long delGen; // current generation of del file; NO if there - // are no deletes; CHECK_DIR if it's a pre-2.1 segment - // (and we must check filesystem); YES or higher if - // there are deletes at generation N - - private long[] normGen; // current generation of each field's norm file. - // If this array is null, for lockLess this means no - // separate norms. For preLockLess this means we must - // check filesystem. If this array is not null, its - // values mean: NO says this field has no separate - // norms; CHECK_DIR says it is a preLockLess segment and - // filesystem must be checked; >= YES says this field - // has separate norms with the specified generation - - private byte isCompoundFile; // NO if it is not; YES if it is; CHECK_DIR if it's - // pre-2.1 (ie, must check file system to see - // if .cfs and .nrm exist) - - private boolean hasSingleNormFile; // true if this segment maintains norms in a single file; - // false otherwise - // this is currently false for segments populated by DocumentWriter - // and true for newly created merged segments (both - // compound and non compound). - - private volatile List files; // cached list of files that this segment uses - // in the Directory - - private volatile long sizeInBytesNoStore = -1; // total byte size of all but the store files (computed on demand) - private volatile long sizeInBytesWithStore = -1; // total byte size of all of our files (computed on demand) - - private int docStoreOffset; // if this segment shares stored fields & vectors, this - // offset is where in that file this segment's docs begin - private String docStoreSegment; // name used to derive fields/vectors file we share with - // other segments - private boolean docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx) - - private int delCount; // How many deleted docs in this segment, or -1 if not yet known - // (if it's an older index) - - private boolean hasProx; // True if this segment has any fields with omitTermFreqAndPositions==false - - private boolean hasVectors; // True if this segment wrote term vectors - - private Map diagnostics; - - // Tracks the Lucene version this segment was created with, since 3.1. The - // format expected is "x.y" - "2.x" for pre-3.0 indexes, and specific versions - // afterwards ("3.0", "3.1" etc.). - // see Constants.LUCENE_MAIN_VERSION. - private String version; - - // NOTE: only used in-RAM by IW to track buffered deletes; - // this is never written to/read from the Directory - private long bufferedDeletesGen; - - public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile, - boolean hasProx, boolean hasVectors) { - this.name = name; - this.docCount = docCount; - this.dir = dir; - delGen = NO; - this.isCompoundFile = (byte) (isCompoundFile ? YES : NO); - preLockless = false; - this.hasSingleNormFile = hasSingleNormFile; - this.docStoreOffset = -1; - delCount = 0; - this.hasProx = hasProx; - this.hasVectors = hasVectors; - this.version = Constants.LUCENE_MAIN_VERSION; - } - - /** - * Copy everything from src SegmentInfo into our instance. - */ - void reset(SegmentInfo src) { - clearFiles(); - version = src.version; - name = src.name; - docCount = src.docCount; - dir = src.dir; - preLockless = src.preLockless; - delGen = src.delGen; - docStoreOffset = src.docStoreOffset; - docStoreIsCompoundFile = src.docStoreIsCompoundFile; - hasVectors = src.hasVectors; - hasProx = src.hasProx; - if (src.normGen == null) { - normGen = null; - } else { - normGen = new long[src.normGen.length]; - System.arraycopy(src.normGen, 0, normGen, 0, src.normGen.length); - } - isCompoundFile = src.isCompoundFile; - hasSingleNormFile = src.hasSingleNormFile; - delCount = src.delCount; - } - - void setDiagnostics(Map diagnostics) { - this.diagnostics = diagnostics; - } - - public Map getDiagnostics() { - return diagnostics; - } - - /** - * Construct a new SegmentInfo instance by reading a - * previously saved SegmentInfo from input. - * - * @param dir directory to load from - * @param format format of the segments info file - * @param input input handle to read segment info from - */ - SegmentInfo(Directory dir, int format, IndexInput input) throws IOException { - this.dir = dir; - if (format <= SegmentInfos.FORMAT_3_1) { - version = input.readString(); - } - name = input.readString(); - docCount = input.readInt(); - if (format <= SegmentInfos.FORMAT_LOCKLESS) { - delGen = input.readLong(); - if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE) { - docStoreOffset = input.readInt(); - if (docStoreOffset != -1) { - docStoreSegment = input.readString(); - docStoreIsCompoundFile = (1 == input.readByte()); - } else { - docStoreSegment = name; - docStoreIsCompoundFile = false; - } - } else { - docStoreOffset = -1; - docStoreSegment = name; - docStoreIsCompoundFile = false; - } - if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) { - hasSingleNormFile = (1 == input.readByte()); - } else { - hasSingleNormFile = false; - } - int numNormGen = input.readInt(); - if (numNormGen == NO) { - normGen = null; - } else { - normGen = new long[numNormGen]; - for(int j=0;jemptyMap(); - } - - if (format <= SegmentInfos.FORMAT_HAS_VECTORS) { - hasVectors = input.readByte() == 1; - } else { - final String storesSegment; - final String ext; - final boolean isCompoundFile; - if (docStoreOffset != -1) { - storesSegment = docStoreSegment; - isCompoundFile = docStoreIsCompoundFile; - ext = IndexFileNames.COMPOUND_FILE_STORE_EXTENSION; - } else { - storesSegment = name; - isCompoundFile = getUseCompoundFile(); - ext = IndexFileNames.COMPOUND_FILE_EXTENSION; - } - final Directory dirToTest; - if (isCompoundFile) { - dirToTest = new CompoundFileReader(dir, IndexFileNames.segmentFileName(storesSegment, ext)); - } else { - dirToTest = dir; - } - try { - hasVectors = dirToTest.fileExists(IndexFileNames.segmentFileName(storesSegment, IndexFileNames.VECTORS_INDEX_EXTENSION)); - } finally { - if (isCompoundFile) { - dirToTest.close(); - } - } - } - } else { - delGen = CHECK_DIR; - normGen = null; - isCompoundFile = CHECK_DIR; - preLockless = true; - hasSingleNormFile = false; - docStoreOffset = -1; - docStoreIsCompoundFile = false; - docStoreSegment = null; - delCount = -1; - hasProx = true; - diagnostics = Collections.emptyMap(); - } - } - - void setNumFields(int numFields) { - if (normGen == null) { - // normGen is null if we loaded a pre-2.1 segment - // file, or, if this segments file hasn't had any - // norms set against it yet: - normGen = new long[numFields]; - - if (preLockless) { - // Do nothing: thus leaving normGen[k]==CHECK_DIR (==0), so that later we know - // we have to check filesystem for norm files, because this is prelockless. - - } else { - // This is a FORMAT_LOCKLESS segment, which means - // there are no separate norms: - for(int i=0;i= YES: this means this segment was written by - // the LOCKLESS code and for certain has - // deletions - // - if (delGen == NO) { - return false; - } else if (delGen >= YES) { - return true; - } else { - return dir.fileExists(getDelFileName()); - } - } - - void advanceDelGen() { - // delGen 0 is reserved for pre-LOCKLESS format - if (delGen == NO) { - delGen = YES; - } else { - delGen++; - } - clearFiles(); - } - - void clearDelGen() { - delGen = NO; - clearFiles(); - } - - @Override - public Object clone() { - SegmentInfo si = new SegmentInfo(name, docCount, dir, false, hasSingleNormFile, - hasProx, hasVectors); - si.docStoreOffset = docStoreOffset; - si.docStoreSegment = docStoreSegment; - si.docStoreIsCompoundFile = docStoreIsCompoundFile; - si.delGen = delGen; - si.delCount = delCount; - si.preLockless = preLockless; - si.isCompoundFile = isCompoundFile; - si.diagnostics = new HashMap(diagnostics); - if (normGen != null) { - si.normGen = normGen.clone(); - } - si.version = version; - return si; - } - - public String getDelFileName() { - if (delGen == NO) { - // In this case we know there is no deletion filename - // against this segment - return null; - } else { - // If delGen is CHECK_DIR, it's the pre-lockless-commit file format - return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen); - } - } - - /** - * Returns true if this field for this segment has saved a separate norms file (__N.sX). - * - * @param fieldNumber the field index to check - */ - public boolean hasSeparateNorms(int fieldNumber) - throws IOException { - if ((normGen == null && preLockless) || (normGen != null && normGen[fieldNumber] == CHECK_DIR)) { - // Must fallback to directory file exists check: - String fileName = name + ".s" + fieldNumber; - return dir.fileExists(fileName); - } else if (normGen == null || normGen[fieldNumber] == NO) { - return false; - } else { - return true; - } - } - - /** - * Returns true if any fields in this segment have separate norms. - */ - public boolean hasSeparateNorms() - throws IOException { - if (normGen == null) { - if (!preLockless) { - // This means we were created w/ LOCKLESS code and no - // norms are written yet: - return false; - } else { - // This means this segment was saved with pre-LOCKLESS - // code. So we must fallback to the original - // directory list check: - String[] result = dir.listAll(); - if (result == null) - throw new IOException("cannot read directory " + dir + ": listAll() returned null"); - - final IndexFileNameFilter filter = IndexFileNameFilter.getFilter(); - String pattern; - pattern = name + ".s"; - int patternLength = pattern.length(); - for(int i = 0; i < result.length; i++){ - String fileName = result[i]; - if (filter.accept(null, fileName) && fileName.startsWith(pattern) && Character.isDigit(fileName.charAt(patternLength))) - return true; - } - return false; - } - } else { - // This means this segment was saved with LOCKLESS - // code so we first check whether any normGen's are >= 1 - // (meaning they definitely have separate norms): - for(int i=0;i= YES) { - return true; - } - } - // Next we look for any == 0. These cases were - // pre-LOCKLESS and must be checked in directory: - for(int i=0;i").append(docStoreSegment); - if (docStoreIsCompoundFile) { - s.append('c'); - } else { - s.append('C'); - } - s.append('+').append(docStoreOffset); - } - - return s.toString(); - } - - /** We consider another SegmentInfo instance equal if it - * has the same dir and same name. */ - @Override - public boolean equals(Object obj) { - if (this == obj) return true; - if (obj instanceof SegmentInfo) { - final SegmentInfo other = (SegmentInfo) obj; - return other.dir == dir && other.name.equals(name); - } else { - return false; - } - } - - @Override - public int hashCode() { - return dir.hashCode() + name.hashCode(); - } - - /** - * Used by SegmentInfos to upgrade segments that do not record their code - * version (either "2.x" or "3.0"). - *

- * NOTE: this method is used for internal purposes only - you should - * not modify the version of a SegmentInfo, or it may result in unexpected - * exceptions thrown when you attempt to open the index. - * - * @lucene.internal - */ - void setVersion(String version) { - this.version = version; - } - - /** Returns the version of the code which wrote the segment. */ - public String getVersion() { - return version; - } - - long getBufferedDeletesGen() { - return bufferedDeletesGen; - } - - void setBufferedDeletesGen(long v) { - bufferedDeletesGen = v; - } -}