--- /dev/null
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Comparator;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.io.IOException;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;
+
+/**
+ * Gathers all Fieldables for a document under the same
+ * name, updates FieldInfos, and calls per-field consumers
+ * to process field by field.
+ *
+ * Currently, only a single thread visits the fields,
+ * sequentially, for processing.
+ */
+
+final class DocFieldProcessorPerThread extends DocConsumerPerThread {
+
+ float docBoost;
+ int fieldGen;
+ final DocFieldProcessor docFieldProcessor;
+ final FieldInfos fieldInfos;
+ final DocFieldConsumerPerThread consumer;
+
+ // Holds all fields seen in current doc
+ DocFieldProcessorPerField[] fields = new DocFieldProcessorPerField[1];
+ int fieldCount;
+
+ // Hash table for all fields ever seen
+ DocFieldProcessorPerField[] fieldHash = new DocFieldProcessorPerField[2];
+ int hashMask = 1;
+ int totalFieldCount;
+
+ final StoredFieldsWriterPerThread fieldsWriter;
+
+ final DocumentsWriter.DocState docState;
+
+ public DocFieldProcessorPerThread(DocumentsWriterThreadState threadState, DocFieldProcessor docFieldProcessor) throws IOException {
+ this.docState = threadState.docState;
+ this.docFieldProcessor = docFieldProcessor;
+ this.fieldInfos = docFieldProcessor.fieldInfos;
+ this.consumer = docFieldProcessor.consumer.addThread(this);
+ fieldsWriter = docFieldProcessor.fieldsWriter.addThread(docState);
+ }
+
+ @Override
+ public void abort() {
+ Throwable th = null;
+
+ for (DocFieldProcessorPerField field : fieldHash) {
+ while (field != null) {
+ final DocFieldProcessorPerField next = field.next;
+ try {
+ field.abort();
+ } catch (Throwable t) {
+ if (th == null) {
+ th = t;
+ }
+ }
+ field = next;
+ }
+ }
+
+ try {
+ fieldsWriter.abort();
+ } catch (Throwable t) {
+ if (th == null) {
+ th = t;
+ }
+ }
+
+ try {
+ consumer.abort();
+ } catch (Throwable t) {
+ if (th == null) {
+ th = t;
+ }
+ }
+
+ // If any errors occured, throw it.
+ if (th != null) {
+ if (th instanceof RuntimeException) throw (RuntimeException) th;
+ if (th instanceof Error) throw (Error) th;
+ // defensive code - we should not hit unchecked exceptions
+ throw new RuntimeException(th);
+ }
+ }
+
+ public Collection<DocFieldConsumerPerField> fields() {
+ Collection<DocFieldConsumerPerField> fields = new HashSet<DocFieldConsumerPerField>();
+ for(int i=0;i<fieldHash.length;i++) {
+ DocFieldProcessorPerField field = fieldHash[i];
+ while(field != null) {
+ fields.add(field.consumer);
+ field = field.next;
+ }
+ }
+ assert fields.size() == totalFieldCount;
+ return fields;
+ }
+
+ /** If there are fields we've seen but did not see again
+ * in the last run, then free them up. */
+
+ void trimFields(SegmentWriteState state) {
+
+ for(int i=0;i<fieldHash.length;i++) {
+ DocFieldProcessorPerField perField = fieldHash[i];
+ DocFieldProcessorPerField lastPerField = null;
+
+ while (perField != null) {
+
+ if (perField.lastGen == -1) {
+
+ // This field was not seen since the previous
+ // flush, so, free up its resources now
+
+ // Unhash
+ if (lastPerField == null)
+ fieldHash[i] = perField.next;
+ else
+ lastPerField.next = perField.next;
+
+ if (state.infoStream != null)
+ state.infoStream.println(" purge field=" + perField.fieldInfo.name);
+
+ totalFieldCount--;
+
+ } else {
+ // Reset
+ perField.lastGen = -1;
+ lastPerField = perField;
+ }
+
+ perField = perField.next;
+ }
+ }
+ }
+
+ private void rehash() {
+ final int newHashSize = (fieldHash.length*2);
+ assert newHashSize > fieldHash.length;
+
+ final DocFieldProcessorPerField newHashArray[] = new DocFieldProcessorPerField[newHashSize];
+
+ // Rehash
+ int newHashMask = newHashSize-1;
+ for(int j=0;j<fieldHash.length;j++) {
+ DocFieldProcessorPerField fp0 = fieldHash[j];
+ while(fp0 != null) {
+ final int hashPos2 = fp0.fieldInfo.name.hashCode() & newHashMask;
+ DocFieldProcessorPerField nextFP0 = fp0.next;
+ fp0.next = newHashArray[hashPos2];
+ newHashArray[hashPos2] = fp0;
+ fp0 = nextFP0;
+ }
+ }
+
+ fieldHash = newHashArray;
+ hashMask = newHashMask;
+ }
+
+ @Override
+ public DocumentsWriter.DocWriter processDocument() throws IOException {
+
+ consumer.startDocument();
+ fieldsWriter.startDocument();
+
+ final Document doc = docState.doc;
+
+ assert docFieldProcessor.docWriter.writer.testPoint("DocumentsWriter.ThreadState.init start");
+
+ fieldCount = 0;
+
+ final int thisFieldGen = fieldGen++;
+
+ final List<Fieldable> docFields = doc.getFields();
+ final int numDocFields = docFields.size();
+
+ // Absorb any new fields first seen in this document.
+ // Also absorb any changes to fields we had already
+ // seen before (eg suddenly turning on norms or
+ // vectors, etc.):
+
+ for(int i=0;i<numDocFields;i++) {
+ Fieldable field = docFields.get(i);
+ final String fieldName = field.name();
+
+ // Make sure we have a PerField allocated
+ final int hashPos = fieldName.hashCode() & hashMask;
+ DocFieldProcessorPerField fp = fieldHash[hashPos];
+ while(fp != null && !fp.fieldInfo.name.equals(fieldName))
+ fp = fp.next;
+
+ if (fp == null) {
+
+ // TODO FI: we need to genericize the "flags" that a
+ // field holds, and, how these flags are merged; it
+ // needs to be more "pluggable" such that if I want
+ // to have a new "thing" my Fields can do, I can
+ // easily add it
+ FieldInfo fi = fieldInfos.add(fieldName, field.isIndexed(), field.isTermVectorStored(),
+ field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
+ field.getOmitNorms(), false, field.getIndexOptions());
+
+ fp = new DocFieldProcessorPerField(this, fi);
+ fp.next = fieldHash[hashPos];
+ fieldHash[hashPos] = fp;
+ totalFieldCount++;
+
+ if (totalFieldCount >= fieldHash.length/2)
+ rehash();
+ } else {
+ fp.fieldInfo.update(field.isIndexed(), field.isTermVectorStored(),
+ field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
+ field.getOmitNorms(), false, field.getIndexOptions());
+ }
+
+ if (thisFieldGen != fp.lastGen) {
+
+ // First time we're seeing this field for this doc
+ fp.fieldCount = 0;
+
+ if (fieldCount == fields.length) {
+ final int newSize = fields.length*2;
+ DocFieldProcessorPerField newArray[] = new DocFieldProcessorPerField[newSize];
+ System.arraycopy(fields, 0, newArray, 0, fieldCount);
+ fields = newArray;
+ }
+
+ fields[fieldCount++] = fp;
+ fp.lastGen = thisFieldGen;
+ }
+
+ if (fp.fieldCount == fp.fields.length) {
+ Fieldable[] newArray = new Fieldable[fp.fields.length*2];
+ System.arraycopy(fp.fields, 0, newArray, 0, fp.fieldCount);
+ fp.fields = newArray;
+ }
+
+ fp.fields[fp.fieldCount++] = field;
+ if (field.isStored()) {
+ fieldsWriter.addField(field, fp.fieldInfo);
+ }
+ }
+
+ // If we are writing vectors then we must visit
+ // fields in sorted order so they are written in
+ // sorted order. TODO: we actually only need to
+ // sort the subset of fields that have vectors
+ // enabled; we could save [small amount of] CPU
+ // here.
+ ArrayUtil.quickSort(fields, 0, fieldCount, fieldsComp);
+
+ for(int i=0;i<fieldCount;i++)
+ fields[i].consumer.processFields(fields[i].fields, fields[i].fieldCount);
+
+ if (docState.maxTermPrefix != null && docState.infoStream != null) {
+ docState.infoStream.println("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
+ docState.maxTermPrefix = null;
+ }
+
+ final DocumentsWriter.DocWriter one = fieldsWriter.finishDocument();
+ final DocumentsWriter.DocWriter two = consumer.finishDocument();
+ if (one == null) {
+ return two;
+ } else if (two == null) {
+ return one;
+ } else {
+ PerDoc both = getPerDoc();
+ both.docID = docState.docID;
+ assert one.docID == docState.docID;
+ assert two.docID == docState.docID;
+ both.one = one;
+ both.two = two;
+ return both;
+ }
+ }
+
+ private static final Comparator<DocFieldProcessorPerField> fieldsComp = new Comparator<DocFieldProcessorPerField>() {
+ public int compare(DocFieldProcessorPerField o1, DocFieldProcessorPerField o2) {
+ return o1.fieldInfo.name.compareTo(o2.fieldInfo.name);
+ }
+ };
+
+ PerDoc[] docFreeList = new PerDoc[1];
+ int freeCount;
+ int allocCount;
+
+ synchronized PerDoc getPerDoc() {
+ if (freeCount == 0) {
+ allocCount++;
+ if (allocCount > docFreeList.length) {
+ // Grow our free list up front to make sure we have
+ // enough space to recycle all outstanding PerDoc
+ // instances
+ assert allocCount == 1+docFreeList.length;
+ docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+ }
+ return new PerDoc();
+ } else
+ return docFreeList[--freeCount];
+ }
+
+ synchronized void freePerDoc(PerDoc perDoc) {
+ assert freeCount < docFreeList.length;
+ docFreeList[freeCount++] = perDoc;
+ }
+
+ class PerDoc extends DocumentsWriter.DocWriter {
+
+ DocumentsWriter.DocWriter one;
+ DocumentsWriter.DocWriter two;
+
+ @Override
+ public long sizeInBytes() {
+ return one.sizeInBytes() + two.sizeInBytes();
+ }
+
+ @Override
+ public void finish() throws IOException {
+ try {
+ try {
+ one.finish();
+ } finally {
+ two.finish();
+ }
+ } finally {
+ freePerDoc(this);
+ }
+ }
+
+ @Override
+ public void abort() {
+ try {
+ try {
+ one.abort();
+ } finally {
+ two.abort();
+ }
+ } finally {
+ freePerDoc(this);
+ }
+ }
+ }
+}