1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.document.Document;
21 import org.apache.lucene.document.Fieldable;
22 import org.apache.lucene.index.FieldInfo.IndexOptions;
23 import org.apache.lucene.store.Directory;
24 import org.apache.lucene.store.IndexInput;
25 import org.apache.lucene.store.IndexOutput;
26 import org.apache.lucene.util.StringHelper;
28 import java.io.IOException;
31 /** Access to the Fieldable Info file that describes document fields and whether or
32 * not they are indexed. Each segment has a separate Fieldable Info file. Objects
33 * of this class are thread-safe for multiple readers, but only one thread can
34 * be adding documents at a time, with no other reader or writer threads
35 * accessing this object.
37 final class FieldInfos {
39 // Used internally (ie not written to *.fnm files) for pre-2.9 files
40 public static final int FORMAT_PRE = -1;
42 // First used in 2.9; prior to 2.9 there was no format header
43 public static final int FORMAT_START = -2;
45 // First used in 3.4: omit only positional information
46 public static final int FORMAT_OMIT_POSITIONS = -3;
48 // whenever you add a new format, make it 1 smaller (negative version logic)!
49 static final int CURRENT_FORMAT = FORMAT_OMIT_POSITIONS;
51 static final byte IS_INDEXED = 0x1;
52 static final byte STORE_TERMVECTOR = 0x2;
53 static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x4;
54 static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x8;
55 static final byte OMIT_NORMS = 0x10;
56 static final byte STORE_PAYLOADS = 0x20;
57 static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
58 static final byte OMIT_POSITIONS = -128;
60 private final ArrayList<FieldInfo> byNumber = new ArrayList<FieldInfo>();
61 private final HashMap<String,FieldInfo> byName = new HashMap<String,FieldInfo>();
67 * Construct a FieldInfos object using the directory and the name of the file
69 * @param d The directory to open the IndexInput from
70 * @param name The name of the file to open the IndexInput from in the Directory
73 FieldInfos(Directory d, String name) throws IOException {
74 IndexInput input = d.openInput(name);
78 } catch (IOException ioe) {
79 if (format == FORMAT_PRE) {
80 // LUCENE-1623: FORMAT_PRE (before there was a
81 // format) may be 2.3.2 (pre-utf8) or 2.4.x (utf8)
82 // encoding; retry with input set to pre-utf8
84 input.setModifiedUTF8StringsMode();
89 } catch (Throwable t) {
90 // Ignore any new exception & throw original IOE
94 // The IOException cannot be caused by
95 // LUCENE-1623, so re-throw it
105 * Returns a deep clone of this FieldInfos instance.
108 synchronized public Object clone() {
109 FieldInfos fis = new FieldInfos();
110 final int numField = byNumber.size();
111 for(int i=0;i<numField;i++) {
112 FieldInfo fi = (FieldInfo) ( byNumber.get(i)).clone();
113 fis.byNumber.add(fi);
114 fis.byName.put(fi.name, fi);
119 /** Adds field info for a Document. */
120 synchronized public void add(Document doc) {
121 List<Fieldable> fields = doc.getFields();
122 for (Fieldable field : fields) {
123 add(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
124 field.isStoreOffsetWithTermVector(), field.getOmitNorms(), false, field.getIndexOptions());
128 /** Returns true if any fields do not omitTermFreqAndPositions */
130 final int numFields = byNumber.size();
131 for(int i=0;i<numFields;i++) {
132 final FieldInfo fi = fieldInfo(i);
133 if (fi.isIndexed && fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
141 * Add fields that are indexed. Whether they have termvectors has to be specified.
143 * @param names The names of the fields
144 * @param storeTermVectors Whether the fields store term vectors or not
145 * @param storePositionWithTermVector true if positions should be stored.
146 * @param storeOffsetWithTermVector true if offsets should be stored
148 synchronized public void addIndexed(Collection<String> names, boolean storeTermVectors, boolean storePositionWithTermVector,
149 boolean storeOffsetWithTermVector) {
150 for (String name : names) {
151 add(name, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector);
156 * Assumes the fields are not storing term vectors.
158 * @param names The names of the fields
159 * @param isIndexed Whether the fields are indexed or not
161 * @see #add(String, boolean)
163 synchronized public void add(Collection<String> names, boolean isIndexed) {
164 for (String name : names) {
165 add(name, isIndexed);
170 * Calls 5 parameter add with false for all TermVector parameters.
172 * @param name The name of the Fieldable
173 * @param isIndexed true if the field is indexed
174 * @see #add(String, boolean, boolean, boolean, boolean)
176 synchronized public void add(String name, boolean isIndexed) {
177 add(name, isIndexed, false, false, false, false);
181 * Calls 5 parameter add with false for term vector positions and offsets.
183 * @param name The name of the field
184 * @param isIndexed true if the field is indexed
185 * @param storeTermVector true if the term vector should be stored
187 synchronized public void add(String name, boolean isIndexed, boolean storeTermVector){
188 add(name, isIndexed, storeTermVector, false, false, false);
191 /** If the field is not yet known, adds it. If it is known, checks to make
192 * sure that the isIndexed flag is the same as was given previously for this
193 * field. If not - marks it as being indexed. Same goes for the TermVector
196 * @param name The name of the field
197 * @param isIndexed true if the field is indexed
198 * @param storeTermVector true if the term vector should be stored
199 * @param storePositionWithTermVector true if the term vector with positions should be stored
200 * @param storeOffsetWithTermVector true if the term vector with offsets should be stored
202 synchronized public void add(String name, boolean isIndexed, boolean storeTermVector,
203 boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) {
205 add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false);
208 /** If the field is not yet known, adds it. If it is known, checks to make
209 * sure that the isIndexed flag is the same as was given previously for this
210 * field. If not - marks it as being indexed. Same goes for the TermVector
213 * @param name The name of the field
214 * @param isIndexed true if the field is indexed
215 * @param storeTermVector true if the term vector should be stored
216 * @param storePositionWithTermVector true if the term vector with positions should be stored
217 * @param storeOffsetWithTermVector true if the term vector with offsets should be stored
218 * @param omitNorms true if the norms for the indexed field should be omitted
220 synchronized public void add(String name, boolean isIndexed, boolean storeTermVector,
221 boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) {
222 add(name, isIndexed, storeTermVector, storePositionWithTermVector,
223 storeOffsetWithTermVector, omitNorms, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
226 /** If the field is not yet known, adds it. If it is known, checks to make
227 * sure that the isIndexed flag is the same as was given previously for this
228 * field. If not - marks it as being indexed. Same goes for the TermVector
231 * @param name The name of the field
232 * @param isIndexed true if the field is indexed
233 * @param storeTermVector true if the term vector should be stored
234 * @param storePositionWithTermVector true if the term vector with positions should be stored
235 * @param storeOffsetWithTermVector true if the term vector with offsets should be stored
236 * @param omitNorms true if the norms for the indexed field should be omitted
237 * @param storePayloads true if payloads should be stored for this field
238 * @param indexOptions if term freqs should be omitted for this field
240 synchronized public FieldInfo add(String name, boolean isIndexed, boolean storeTermVector,
241 boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
242 boolean omitNorms, boolean storePayloads, IndexOptions indexOptions) {
243 FieldInfo fi = fieldInfo(name);
245 return addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions);
247 fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions);
249 assert fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.storePayloads;
253 synchronized public FieldInfo add(FieldInfo fi) {
254 return add(fi.name, fi.isIndexed, fi.storeTermVector,
255 fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
256 fi.omitNorms, fi.storePayloads,
260 private FieldInfo addInternal(String name, boolean isIndexed,
261 boolean storeTermVector, boolean storePositionWithTermVector,
262 boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, IndexOptions indexOptions) {
263 name = StringHelper.intern(name);
264 FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector,
265 storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions);
267 byName.put(name, fi);
271 public int fieldNumber(String fieldName) {
272 FieldInfo fi = fieldInfo(fieldName);
273 return (fi != null) ? fi.number : -1;
276 public FieldInfo fieldInfo(String fieldName) {
277 return byName.get(fieldName);
281 * Return the fieldName identified by its number.
284 * @return the fieldName or an empty string when the field
285 * with the given number doesn't exist.
287 public String fieldName(int fieldNumber) {
288 FieldInfo fi = fieldInfo(fieldNumber);
289 return (fi != null) ? fi.name : "";
293 * Return the fieldinfo object referenced by the fieldNumber.
295 * @return the FieldInfo object or null when the given fieldNumber
298 public FieldInfo fieldInfo(int fieldNumber) {
299 return (fieldNumber >= 0) ? byNumber.get(fieldNumber) : null;
303 return byNumber.size();
306 public boolean hasVectors() {
307 boolean hasVectors = false;
308 for (int i = 0; i < size(); i++) {
309 if (fieldInfo(i).storeTermVector) {
317 public void write(Directory d, String name) throws IOException {
318 IndexOutput output = d.createOutput(name);
326 public void write(IndexOutput output) throws IOException {
327 output.writeVInt(CURRENT_FORMAT);
328 output.writeVInt(size());
329 for (int i = 0; i < size(); i++) {
330 FieldInfo fi = fieldInfo(i);
331 assert fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.storePayloads;
333 if (fi.isIndexed) bits |= IS_INDEXED;
334 if (fi.storeTermVector) bits |= STORE_TERMVECTOR;
335 if (fi.storePositionWithTermVector) bits |= STORE_POSITIONS_WITH_TERMVECTOR;
336 if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR;
337 if (fi.omitNorms) bits |= OMIT_NORMS;
338 if (fi.storePayloads) bits |= STORE_PAYLOADS;
339 if (fi.indexOptions == IndexOptions.DOCS_ONLY)
340 bits |= OMIT_TERM_FREQ_AND_POSITIONS;
341 else if (fi.indexOptions == IndexOptions.DOCS_AND_FREQS)
342 bits |= OMIT_POSITIONS;
344 output.writeString(fi.name);
345 output.writeByte(bits);
349 private void read(IndexInput input, String fileName) throws IOException {
350 int firstInt = input.readVInt();
353 // This is a real format
359 if (format != FORMAT_PRE && format != FORMAT_START && format != FORMAT_OMIT_POSITIONS) {
360 throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\"");
364 if (format == FORMAT_PRE) {
367 size = input.readVInt(); //read in the size
370 for (int i = 0; i < size; i++) {
371 String name = StringHelper.intern(input.readString());
372 byte bits = input.readByte();
373 boolean isIndexed = (bits & IS_INDEXED) != 0;
374 boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
375 boolean storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
376 boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
377 boolean omitNorms = (bits & OMIT_NORMS) != 0;
378 boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
379 final IndexOptions indexOptions;
380 if ((bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
381 indexOptions = IndexOptions.DOCS_ONLY;
382 } else if ((bits & OMIT_POSITIONS) != 0) {
383 if (format <= FORMAT_OMIT_POSITIONS) {
384 indexOptions = IndexOptions.DOCS_AND_FREQS;
386 throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format);
389 indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
392 // LUCENE-3027: past indices were able to write
393 // storePayloads=true when omitTFAP is also true,
394 // which is invalid. We correct that, here:
395 if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
396 storePayloads = false;
399 addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions);
402 if (input.getFilePointer() != input.length()) {
403 throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length());