1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.util.Comparator;
21 import java.util.Collection;
22 import java.util.HashSet;
23 import java.util.List;
24 import java.io.IOException;
25 import org.apache.lucene.document.Document;
26 import org.apache.lucene.document.Fieldable;
27 import org.apache.lucene.util.ArrayUtil;
28 import org.apache.lucene.util.RamUsageEstimator;
31 * Gathers all Fieldables for a document under the same
32 * name, updates FieldInfos, and calls per-field consumers
33 * to process field by field.
35 * Currently, only a single thread visits the fields,
36 * sequentially, for processing.
39 final class DocFieldProcessorPerThread extends DocConsumerPerThread {
43 final DocFieldProcessor docFieldProcessor;
44 final FieldInfos fieldInfos;
45 final DocFieldConsumerPerThread consumer;
47 // Holds all fields seen in current doc
48 DocFieldProcessorPerField[] fields = new DocFieldProcessorPerField[1];
51 // Hash table for all fields ever seen
52 DocFieldProcessorPerField[] fieldHash = new DocFieldProcessorPerField[2];
56 final StoredFieldsWriterPerThread fieldsWriter;
58 final DocumentsWriter.DocState docState;
60 public DocFieldProcessorPerThread(DocumentsWriterThreadState threadState, DocFieldProcessor docFieldProcessor) throws IOException {
61 this.docState = threadState.docState;
62 this.docFieldProcessor = docFieldProcessor;
63 this.fieldInfos = docFieldProcessor.fieldInfos;
64 this.consumer = docFieldProcessor.consumer.addThread(this);
65 fieldsWriter = docFieldProcessor.fieldsWriter.addThread(docState);
72 for (DocFieldProcessorPerField field : fieldHash) {
73 while (field != null) {
74 final DocFieldProcessorPerField next = field.next;
77 } catch (Throwable t) {
88 } catch (Throwable t) {
96 } catch (Throwable t) {
102 // If any errors occured, throw it.
104 if (th instanceof RuntimeException) throw (RuntimeException) th;
105 if (th instanceof Error) throw (Error) th;
106 // defensive code - we should not hit unchecked exceptions
107 throw new RuntimeException(th);
111 public Collection<DocFieldConsumerPerField> fields() {
112 Collection<DocFieldConsumerPerField> fields = new HashSet<DocFieldConsumerPerField>();
113 for(int i=0;i<fieldHash.length;i++) {
114 DocFieldProcessorPerField field = fieldHash[i];
115 while(field != null) {
116 fields.add(field.consumer);
120 assert fields.size() == totalFieldCount;
124 /** If there are fields we've seen but did not see again
125 * in the last run, then free them up. */
127 void trimFields(SegmentWriteState state) {
129 for(int i=0;i<fieldHash.length;i++) {
130 DocFieldProcessorPerField perField = fieldHash[i];
131 DocFieldProcessorPerField lastPerField = null;
133 while (perField != null) {
135 if (perField.lastGen == -1) {
137 // This field was not seen since the previous
138 // flush, so, free up its resources now
141 if (lastPerField == null)
142 fieldHash[i] = perField.next;
144 lastPerField.next = perField.next;
146 if (state.infoStream != null)
147 state.infoStream.println(" purge field=" + perField.fieldInfo.name);
153 perField.lastGen = -1;
154 lastPerField = perField;
157 perField = perField.next;
162 private void rehash() {
163 final int newHashSize = (fieldHash.length*2);
164 assert newHashSize > fieldHash.length;
166 final DocFieldProcessorPerField newHashArray[] = new DocFieldProcessorPerField[newHashSize];
169 int newHashMask = newHashSize-1;
170 for(int j=0;j<fieldHash.length;j++) {
171 DocFieldProcessorPerField fp0 = fieldHash[j];
173 final int hashPos2 = fp0.fieldInfo.name.hashCode() & newHashMask;
174 DocFieldProcessorPerField nextFP0 = fp0.next;
175 fp0.next = newHashArray[hashPos2];
176 newHashArray[hashPos2] = fp0;
181 fieldHash = newHashArray;
182 hashMask = newHashMask;
186 public DocumentsWriter.DocWriter processDocument() throws IOException {
188 consumer.startDocument();
189 fieldsWriter.startDocument();
191 final Document doc = docState.doc;
193 assert docFieldProcessor.docWriter.writer.testPoint("DocumentsWriter.ThreadState.init start");
197 final int thisFieldGen = fieldGen++;
199 final List<Fieldable> docFields = doc.getFields();
200 final int numDocFields = docFields.size();
202 // Absorb any new fields first seen in this document.
203 // Also absorb any changes to fields we had already
204 // seen before (eg suddenly turning on norms or
207 for(int i=0;i<numDocFields;i++) {
208 Fieldable field = docFields.get(i);
209 final String fieldName = field.name();
211 // Make sure we have a PerField allocated
212 final int hashPos = fieldName.hashCode() & hashMask;
213 DocFieldProcessorPerField fp = fieldHash[hashPos];
214 while(fp != null && !fp.fieldInfo.name.equals(fieldName))
219 // TODO FI: we need to genericize the "flags" that a
220 // field holds, and, how these flags are merged; it
221 // needs to be more "pluggable" such that if I want
222 // to have a new "thing" my Fields can do, I can
224 FieldInfo fi = fieldInfos.add(fieldName, field.isIndexed(), field.isTermVectorStored(),
225 field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
226 field.getOmitNorms(), false, field.getIndexOptions());
228 fp = new DocFieldProcessorPerField(this, fi);
229 fp.next = fieldHash[hashPos];
230 fieldHash[hashPos] = fp;
233 if (totalFieldCount >= fieldHash.length/2)
236 fp.fieldInfo.update(field.isIndexed(), field.isTermVectorStored(),
237 field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
238 field.getOmitNorms(), false, field.getIndexOptions());
241 if (thisFieldGen != fp.lastGen) {
243 // First time we're seeing this field for this doc
246 if (fieldCount == fields.length) {
247 final int newSize = fields.length*2;
248 DocFieldProcessorPerField newArray[] = new DocFieldProcessorPerField[newSize];
249 System.arraycopy(fields, 0, newArray, 0, fieldCount);
253 fields[fieldCount++] = fp;
254 fp.lastGen = thisFieldGen;
257 if (fp.fieldCount == fp.fields.length) {
258 Fieldable[] newArray = new Fieldable[fp.fields.length*2];
259 System.arraycopy(fp.fields, 0, newArray, 0, fp.fieldCount);
260 fp.fields = newArray;
263 fp.fields[fp.fieldCount++] = field;
264 if (field.isStored()) {
265 fieldsWriter.addField(field, fp.fieldInfo);
269 // If we are writing vectors then we must visit
270 // fields in sorted order so they are written in
271 // sorted order. TODO: we actually only need to
272 // sort the subset of fields that have vectors
273 // enabled; we could save [small amount of] CPU
275 ArrayUtil.quickSort(fields, 0, fieldCount, fieldsComp);
277 for(int i=0;i<fieldCount;i++)
278 fields[i].consumer.processFields(fields[i].fields, fields[i].fieldCount);
280 if (docState.maxTermPrefix != null && docState.infoStream != null) {
281 docState.infoStream.println("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
282 docState.maxTermPrefix = null;
285 final DocumentsWriter.DocWriter one = fieldsWriter.finishDocument();
286 final DocumentsWriter.DocWriter two = consumer.finishDocument();
289 } else if (two == null) {
292 PerDoc both = getPerDoc();
293 both.docID = docState.docID;
294 assert one.docID == docState.docID;
295 assert two.docID == docState.docID;
302 private static final Comparator<DocFieldProcessorPerField> fieldsComp = new Comparator<DocFieldProcessorPerField>() {
303 public int compare(DocFieldProcessorPerField o1, DocFieldProcessorPerField o2) {
304 return o1.fieldInfo.name.compareTo(o2.fieldInfo.name);
308 PerDoc[] docFreeList = new PerDoc[1];
312 synchronized PerDoc getPerDoc() {
313 if (freeCount == 0) {
315 if (allocCount > docFreeList.length) {
316 // Grow our free list up front to make sure we have
317 // enough space to recycle all outstanding PerDoc
319 assert allocCount == 1+docFreeList.length;
320 docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
324 return docFreeList[--freeCount];
327 synchronized void freePerDoc(PerDoc perDoc) {
328 assert freeCount < docFreeList.length;
329 docFreeList[freeCount++] = perDoc;
332 class PerDoc extends DocumentsWriter.DocWriter {
334 DocumentsWriter.DocWriter one;
335 DocumentsWriter.DocWriter two;
338 public long sizeInBytes() {
339 return one.sizeInBytes() + two.sizeInBytes();
343 public void finish() throws IOException {
356 public void abort() {