--- /dev/null
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.StringHelper;
+
+import java.io.IOException;
+import java.util.*;
+
+/** Access to the Fieldable Info file that describes document fields and whether or
+ * not they are indexed. Each segment has a separate Fieldable Info file. Objects
+ * of this class are thread-safe for multiple readers, but only one thread can
+ * be adding documents at a time, with no other reader or writer threads
+ * accessing this object.
+ */
+final class FieldInfos {
+
+ // Used internally (ie not written to *.fnm files) for pre-2.9 files
+ public static final int FORMAT_PRE = -1;
+
+ // First used in 2.9; prior to 2.9 there was no format header
+ public static final int FORMAT_START = -2;
+
+ // First used in 3.4: omit only positional information
+ public static final int FORMAT_OMIT_POSITIONS = -3;
+
+ // whenever you add a new format, make it 1 smaller (negative version logic)!
+ static final int CURRENT_FORMAT = FORMAT_OMIT_POSITIONS;
+
+ static final byte IS_INDEXED = 0x1;
+ static final byte STORE_TERMVECTOR = 0x2;
+ static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x4;
+ static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x8;
+ static final byte OMIT_NORMS = 0x10;
+ static final byte STORE_PAYLOADS = 0x20;
+ static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
+ static final byte OMIT_POSITIONS = -128;
+
+ private final ArrayList<FieldInfo> byNumber = new ArrayList<FieldInfo>();
+ private final HashMap<String,FieldInfo> byName = new HashMap<String,FieldInfo>();
+ private int format;
+
+ FieldInfos() { }
+
+ /**
+ * Construct a FieldInfos object using the directory and the name of the file
+ * IndexInput
+ * @param d The directory to open the IndexInput from
+ * @param name The name of the file to open the IndexInput from in the Directory
+ * @throws IOException
+ */
+ FieldInfos(Directory d, String name) throws IOException {
+ IndexInput input = d.openInput(name);
+ try {
+ try {
+ read(input, name);
+ } catch (IOException ioe) {
+ if (format == FORMAT_PRE) {
+ // LUCENE-1623: FORMAT_PRE (before there was a
+ // format) may be 2.3.2 (pre-utf8) or 2.4.x (utf8)
+ // encoding; retry with input set to pre-utf8
+ input.seek(0);
+ input.setModifiedUTF8StringsMode();
+ byNumber.clear();
+ byName.clear();
+ try {
+ read(input, name);
+ } catch (Throwable t) {
+ // Ignore any new exception & throw original IOE
+ throw ioe;
+ }
+ } else {
+ // The IOException cannot be caused by
+ // LUCENE-1623, so re-throw it
+ throw ioe;
+ }
+ }
+ } finally {
+ input.close();
+ }
+ }
+
+ /**
+ * Returns a deep clone of this FieldInfos instance.
+ */
+ @Override
+ synchronized public Object clone() {
+ FieldInfos fis = new FieldInfos();
+ final int numField = byNumber.size();
+ for(int i=0;i<numField;i++) {
+ FieldInfo fi = (FieldInfo) ( byNumber.get(i)).clone();
+ fis.byNumber.add(fi);
+ fis.byName.put(fi.name, fi);
+ }
+ return fis;
+ }
+
+ /** Adds field info for a Document. */
+ synchronized public void add(Document doc) {
+ List<Fieldable> fields = doc.getFields();
+ for (Fieldable field : fields) {
+ add(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
+ field.isStoreOffsetWithTermVector(), field.getOmitNorms(), false, field.getIndexOptions());
+ }
+ }
+
+ /** Returns true if any fields do not omitTermFreqAndPositions */
+ boolean hasProx() {
+ final int numFields = byNumber.size();
+ for(int i=0;i<numFields;i++) {
+ final FieldInfo fi = fieldInfo(i);
+ if (fi.isIndexed && fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Add fields that are indexed. Whether they have termvectors has to be specified.
+ *
+ * @param names The names of the fields
+ * @param storeTermVectors Whether the fields store term vectors or not
+ * @param storePositionWithTermVector true if positions should be stored.
+ * @param storeOffsetWithTermVector true if offsets should be stored
+ */
+ synchronized public void addIndexed(Collection<String> names, boolean storeTermVectors, boolean storePositionWithTermVector,
+ boolean storeOffsetWithTermVector) {
+ for (String name : names) {
+ add(name, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector);
+ }
+ }
+
+ /**
+ * Assumes the fields are not storing term vectors.
+ *
+ * @param names The names of the fields
+ * @param isIndexed Whether the fields are indexed or not
+ *
+ * @see #add(String, boolean)
+ */
+ synchronized public void add(Collection<String> names, boolean isIndexed) {
+ for (String name : names) {
+ add(name, isIndexed);
+ }
+ }
+
+ /**
+ * Calls 5 parameter add with false for all TermVector parameters.
+ *
+ * @param name The name of the Fieldable
+ * @param isIndexed true if the field is indexed
+ * @see #add(String, boolean, boolean, boolean, boolean)
+ */
+ synchronized public void add(String name, boolean isIndexed) {
+ add(name, isIndexed, false, false, false, false);
+ }
+
+ /**
+ * Calls 5 parameter add with false for term vector positions and offsets.
+ *
+ * @param name The name of the field
+ * @param isIndexed true if the field is indexed
+ * @param storeTermVector true if the term vector should be stored
+ */
+ synchronized public void add(String name, boolean isIndexed, boolean storeTermVector){
+ add(name, isIndexed, storeTermVector, false, false, false);
+ }
+
+ /** If the field is not yet known, adds it. If it is known, checks to make
+ * sure that the isIndexed flag is the same as was given previously for this
+ * field. If not - marks it as being indexed. Same goes for the TermVector
+ * parameters.
+ *
+ * @param name The name of the field
+ * @param isIndexed true if the field is indexed
+ * @param storeTermVector true if the term vector should be stored
+ * @param storePositionWithTermVector true if the term vector with positions should be stored
+ * @param storeOffsetWithTermVector true if the term vector with offsets should be stored
+ */
+ synchronized public void add(String name, boolean isIndexed, boolean storeTermVector,
+ boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) {
+
+ add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false);
+ }
+
+ /** If the field is not yet known, adds it. If it is known, checks to make
+ * sure that the isIndexed flag is the same as was given previously for this
+ * field. If not - marks it as being indexed. Same goes for the TermVector
+ * parameters.
+ *
+ * @param name The name of the field
+ * @param isIndexed true if the field is indexed
+ * @param storeTermVector true if the term vector should be stored
+ * @param storePositionWithTermVector true if the term vector with positions should be stored
+ * @param storeOffsetWithTermVector true if the term vector with offsets should be stored
+ * @param omitNorms true if the norms for the indexed field should be omitted
+ */
+ synchronized public void add(String name, boolean isIndexed, boolean storeTermVector,
+ boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) {
+ add(name, isIndexed, storeTermVector, storePositionWithTermVector,
+ storeOffsetWithTermVector, omitNorms, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
+ }
+
+ /** If the field is not yet known, adds it. If it is known, checks to make
+ * sure that the isIndexed flag is the same as was given previously for this
+ * field. If not - marks it as being indexed. Same goes for the TermVector
+ * parameters.
+ *
+ * @param name The name of the field
+ * @param isIndexed true if the field is indexed
+ * @param storeTermVector true if the term vector should be stored
+ * @param storePositionWithTermVector true if the term vector with positions should be stored
+ * @param storeOffsetWithTermVector true if the term vector with offsets should be stored
+ * @param omitNorms true if the norms for the indexed field should be omitted
+ * @param storePayloads true if payloads should be stored for this field
+ * @param indexOptions if term freqs should be omitted for this field
+ */
+ synchronized public FieldInfo add(String name, boolean isIndexed, boolean storeTermVector,
+ boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
+ boolean omitNorms, boolean storePayloads, IndexOptions indexOptions) {
+ FieldInfo fi = fieldInfo(name);
+ if (fi == null) {
+ return addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions);
+ } else {
+ fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions);
+ }
+ assert fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.storePayloads;
+ return fi;
+ }
+
+ synchronized public FieldInfo add(FieldInfo fi) {
+ return add(fi.name, fi.isIndexed, fi.storeTermVector,
+ fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
+ fi.omitNorms, fi.storePayloads,
+ fi.indexOptions);
+ }
+
+ private FieldInfo addInternal(String name, boolean isIndexed,
+ boolean storeTermVector, boolean storePositionWithTermVector,
+ boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, IndexOptions indexOptions) {
+ name = StringHelper.intern(name);
+ FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector,
+ storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions);
+ byNumber.add(fi);
+ byName.put(name, fi);
+ return fi;
+ }
+
+ public int fieldNumber(String fieldName) {
+ FieldInfo fi = fieldInfo(fieldName);
+ return (fi != null) ? fi.number : -1;
+ }
+
+ public FieldInfo fieldInfo(String fieldName) {
+ return byName.get(fieldName);
+ }
+
+ /**
+ * Return the fieldName identified by its number.
+ *
+ * @param fieldNumber
+ * @return the fieldName or an empty string when the field
+ * with the given number doesn't exist.
+ */
+ public String fieldName(int fieldNumber) {
+ FieldInfo fi = fieldInfo(fieldNumber);
+ return (fi != null) ? fi.name : "";
+ }
+
+ /**
+ * Return the fieldinfo object referenced by the fieldNumber.
+ * @param fieldNumber
+ * @return the FieldInfo object or null when the given fieldNumber
+ * doesn't exist.
+ */
+ public FieldInfo fieldInfo(int fieldNumber) {
+ return (fieldNumber >= 0) ? byNumber.get(fieldNumber) : null;
+ }
+
+ public int size() {
+ return byNumber.size();
+ }
+
+ public boolean hasVectors() {
+ boolean hasVectors = false;
+ for (int i = 0; i < size(); i++) {
+ if (fieldInfo(i).storeTermVector) {
+ hasVectors = true;
+ break;
+ }
+ }
+ return hasVectors;
+ }
+
+ public void write(Directory d, String name) throws IOException {
+ IndexOutput output = d.createOutput(name);
+ try {
+ write(output);
+ } finally {
+ output.close();
+ }
+ }
+
+ public void write(IndexOutput output) throws IOException {
+ output.writeVInt(CURRENT_FORMAT);
+ output.writeVInt(size());
+ for (int i = 0; i < size(); i++) {
+ FieldInfo fi = fieldInfo(i);
+ assert fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.storePayloads;
+ byte bits = 0x0;
+ if (fi.isIndexed) bits |= IS_INDEXED;
+ if (fi.storeTermVector) bits |= STORE_TERMVECTOR;
+ if (fi.storePositionWithTermVector) bits |= STORE_POSITIONS_WITH_TERMVECTOR;
+ if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR;
+ if (fi.omitNorms) bits |= OMIT_NORMS;
+ if (fi.storePayloads) bits |= STORE_PAYLOADS;
+ if (fi.indexOptions == IndexOptions.DOCS_ONLY)
+ bits |= OMIT_TERM_FREQ_AND_POSITIONS;
+ else if (fi.indexOptions == IndexOptions.DOCS_AND_FREQS)
+ bits |= OMIT_POSITIONS;
+
+ output.writeString(fi.name);
+ output.writeByte(bits);
+ }
+ }
+
+ private void read(IndexInput input, String fileName) throws IOException {
+ int firstInt = input.readVInt();
+
+ if (firstInt < 0) {
+ // This is a real format
+ format = firstInt;
+ } else {
+ format = FORMAT_PRE;
+ }
+
+ if (format != FORMAT_PRE && format != FORMAT_START && format != FORMAT_OMIT_POSITIONS) {
+ throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\"");
+ }
+
+ int size;
+ if (format == FORMAT_PRE) {
+ size = firstInt;
+ } else {
+ size = input.readVInt(); //read in the size
+ }
+
+ for (int i = 0; i < size; i++) {
+ String name = StringHelper.intern(input.readString());
+ byte bits = input.readByte();
+ boolean isIndexed = (bits & IS_INDEXED) != 0;
+ boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
+ boolean storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
+ boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
+ boolean omitNorms = (bits & OMIT_NORMS) != 0;
+ boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
+ final IndexOptions indexOptions;
+ if ((bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
+ indexOptions = IndexOptions.DOCS_ONLY;
+ } else if ((bits & OMIT_POSITIONS) != 0) {
+ if (format <= FORMAT_OMIT_POSITIONS) {
+ indexOptions = IndexOptions.DOCS_AND_FREQS;
+ } else {
+ throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format + " (resource: " + input + ")");
+ }
+ } else {
+ indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
+ }
+
+ // LUCENE-3027: past indices were able to write
+ // storePayloads=true when omitTFAP is also true,
+ // which is invalid. We correct that, here:
+ if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+ storePayloads = false;
+ }
+
+ addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions);
+ }
+
+ if (input.getFilePointer() != input.length()) {
+ throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
+ }
+ }
+
+}