lucene-java-3.4.0/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java

   1 package org.apache.lucene.index;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.util.Comparator;
  21 import java.util.Collection;
  22 import java.util.HashSet;
  23 import java.util.List;
  24 import java.io.IOException;
  25 import org.apache.lucene.document.Document;
  26 import org.apache.lucene.document.Fieldable;
  27 import org.apache.lucene.util.ArrayUtil;
  28 import org.apache.lucene.util.RamUsageEstimator;
  29
  30 /**
  31  * Gathers all Fieldables for a document under the same
  32  * name, updates FieldInfos, and calls per-field consumers
  33  * to process field by field.
  34  *
  35  * Currently, only a single thread visits the fields,
  36  * sequentially, for processing.
  37  */
  38
  39 final class DocFieldProcessorPerThread extends DocConsumerPerThread {
  40
  41   float docBoost;
  42   int fieldGen;
  43   final DocFieldProcessor docFieldProcessor;
  44   final FieldInfos fieldInfos;
  45   final DocFieldConsumerPerThread consumer;
  46
  47   // Holds all fields seen in current doc
  48   DocFieldProcessorPerField[] fields = new DocFieldProcessorPerField[1];
  49   int fieldCount;
  50
  51   // Hash table for all fields ever seen
  52   DocFieldProcessorPerField[] fieldHash = new DocFieldProcessorPerField[2];
  53   int hashMask = 1;
  54   int totalFieldCount;
  55
  56   final StoredFieldsWriterPerThread fieldsWriter;
  57
  58   final DocumentsWriter.DocState docState;
  59
  60   public DocFieldProcessorPerThread(DocumentsWriterThreadState threadState, DocFieldProcessor docFieldProcessor) throws IOException {
  61     this.docState = threadState.docState;
  62     this.docFieldProcessor = docFieldProcessor;
  63     this.fieldInfos = docFieldProcessor.fieldInfos;
  64     this.consumer = docFieldProcessor.consumer.addThread(this);
  65     fieldsWriter = docFieldProcessor.fieldsWriter.addThread(docState);
  66   }
  67
  68   @Override
  69   public void abort() {
  70     Throwable th = null;
  71
  72     for (DocFieldProcessorPerField field : fieldHash) {
  73       while (field != null) {
  74         final DocFieldProcessorPerField next = field.next;
  75         try {
  76           field.abort();
  77         } catch (Throwable t) {
  78           if (th == null) {
  79             th = t;
  80           }
  81         }
  82         field = next;
  83       }
  84     }
  85
  86     try {
  87       fieldsWriter.abort();
  88     } catch (Throwable t) {
  89       if (th == null) {
  90         th = t;
  91       }
  92     }
  93
  94     try {
  95       consumer.abort();
  96     } catch (Throwable t) {
  97       if (th == null) {
  98         th = t;
  99       }
 100     }
 101
 102     // If any errors occured, throw it.
 103     if (th != null) {
 104       if (th instanceof RuntimeException) throw (RuntimeException) th;
 105       if (th instanceof Error) throw (Error) th;
 106       // defensive code - we should not hit unchecked exceptions
 107       throw new RuntimeException(th);
 108     }
 109   }
 110
 111   public Collection<DocFieldConsumerPerField> fields() {
 112     Collection<DocFieldConsumerPerField> fields = new HashSet<DocFieldConsumerPerField>();
 113     for(int i=0;i<fieldHash.length;i++) {
 114       DocFieldProcessorPerField field = fieldHash[i];
 115       while(field != null) {
 116         fields.add(field.consumer);
 117         field = field.next;
 118       }
 119     }
 120     assert fields.size() == totalFieldCount;
 121     return fields;
 122   }
 123
 124   /** If there are fields we've seen but did not see again
 125    *  in the last run, then free them up. */
 126
 127   void trimFields(SegmentWriteState state) {
 128
 129     for(int i=0;i<fieldHash.length;i++) {
 130       DocFieldProcessorPerField perField = fieldHash[i];
 131       DocFieldProcessorPerField lastPerField = null;
 132
 133       while (perField != null) {
 134
 135         if (perField.lastGen == -1) {
 136
 137           // This field was not seen since the previous
 138           // flush, so, free up its resources now
 139
 140           // Unhash
 141           if (lastPerField == null)
 142             fieldHash[i] = perField.next;
 143           else
 144             lastPerField.next = perField.next;
 145
 146           if (state.infoStream != null)
 147             state.infoStream.println("  purge field=" + perField.fieldInfo.name);
 148
 149           totalFieldCount--;
 150
 151         } else {
 152           // Reset
 153           perField.lastGen = -1;
 154           lastPerField = perField;
 155         }
 156
 157         perField = perField.next;
 158       }
 159     }
 160   }
 161
 162   private void rehash() {
 163     final int newHashSize = (fieldHash.length*2);
 164     assert newHashSize > fieldHash.length;
 165
 166     final DocFieldProcessorPerField newHashArray[] = new DocFieldProcessorPerField[newHashSize];
 167
 168     // Rehash
 169     int newHashMask = newHashSize-1;
 170     for(int j=0;j<fieldHash.length;j++) {
 171       DocFieldProcessorPerField fp0 = fieldHash[j];
 172       while(fp0 != null) {
 173         final int hashPos2 = fp0.fieldInfo.name.hashCode() & newHashMask;
 174         DocFieldProcessorPerField nextFP0 = fp0.next;
 175         fp0.next = newHashArray[hashPos2];
 176         newHashArray[hashPos2] = fp0;
 177         fp0 = nextFP0;
 178       }
 179     }
 180
 181     fieldHash = newHashArray;
 182     hashMask = newHashMask;
 183   }
 184
 185   @Override
 186   public DocumentsWriter.DocWriter processDocument() throws IOException {
 187
 188     consumer.startDocument();
 189     fieldsWriter.startDocument();
 190
 191     final Document doc = docState.doc;
 192
 193     assert docFieldProcessor.docWriter.writer.testPoint("DocumentsWriter.ThreadState.init start");
 194
 195     fieldCount = 0;
 196
 197     final int thisFieldGen = fieldGen++;
 198
 199     final List<Fieldable> docFields = doc.getFields();
 200     final int numDocFields = docFields.size();
 201
 202     // Absorb any new fields first seen in this document.
 203     // Also absorb any changes to fields we had already
 204     // seen before (eg suddenly turning on norms or
 205     // vectors, etc.):
 206
 207     for(int i=0;i<numDocFields;i++) {
 208       Fieldable field = docFields.get(i);
 209       final String fieldName = field.name();
 210
 211       // Make sure we have a PerField allocated
 212       final int hashPos = fieldName.hashCode() & hashMask;
 213       DocFieldProcessorPerField fp = fieldHash[hashPos];
 214       while(fp != null && !fp.fieldInfo.name.equals(fieldName))
 215         fp = fp.next;
 216
 217       if (fp == null) {
 218
 219         // TODO FI: we need to genericize the "flags" that a
 220         // field holds, and, how these flags are merged; it
 221         // needs to be more "pluggable" such that if I want
 222         // to have a new "thing" my Fields can do, I can
 223         // easily add it
 224         FieldInfo fi = fieldInfos.add(fieldName, field.isIndexed(), field.isTermVectorStored(),
 225                                       field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
 226                                       field.getOmitNorms(), false, field.getIndexOptions());
 227
 228         fp = new DocFieldProcessorPerField(this, fi);
 229         fp.next = fieldHash[hashPos];
 230         fieldHash[hashPos] = fp;
 231         totalFieldCount++;
 232
 233         if (totalFieldCount >= fieldHash.length/2)
 234           rehash();
 235       } else {
 236         fp.fieldInfo.update(field.isIndexed(), field.isTermVectorStored(),
 237                             field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
 238                             field.getOmitNorms(), false, field.getIndexOptions());
 239       }
 240
 241       if (thisFieldGen != fp.lastGen) {
 242
 243         // First time we're seeing this field for this doc
 244         fp.fieldCount = 0;
 245
 246         if (fieldCount == fields.length) {
 247           final int newSize = fields.length*2;
 248           DocFieldProcessorPerField newArray[] = new DocFieldProcessorPerField[newSize];
 249           System.arraycopy(fields, 0, newArray, 0, fieldCount);
 250           fields = newArray;
 251         }
 252
 253         fields[fieldCount++] = fp;
 254         fp.lastGen = thisFieldGen;
 255       }
 256
 257       if (fp.fieldCount == fp.fields.length) {
 258         Fieldable[] newArray = new Fieldable[fp.fields.length*2];
 259         System.arraycopy(fp.fields, 0, newArray, 0, fp.fieldCount);
 260         fp.fields = newArray;
 261       }
 262
 263       fp.fields[fp.fieldCount++] = field;
 264       if (field.isStored()) {
 265         fieldsWriter.addField(field, fp.fieldInfo);
 266       }
 267     }
 268
 269     // If we are writing vectors then we must visit
 270     // fields in sorted order so they are written in
 271     // sorted order.  TODO: we actually only need to
 272     // sort the subset of fields that have vectors
 273     // enabled; we could save [small amount of] CPU
 274     // here.
 275     ArrayUtil.quickSort(fields, 0, fieldCount, fieldsComp);
 276
 277     for(int i=0;i<fieldCount;i++)
 278       fields[i].consumer.processFields(fields[i].fields, fields[i].fieldCount);
 279
 280     if (docState.maxTermPrefix != null && docState.infoStream != null) {
 281       docState.infoStream.println("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
 282       docState.maxTermPrefix = null;
 283     }
 284
 285     final DocumentsWriter.DocWriter one = fieldsWriter.finishDocument();
 286     final DocumentsWriter.DocWriter two = consumer.finishDocument();
 287     if (one == null) {
 288       return two;
 289     } else if (two == null) {
 290       return one;
 291     } else {
 292       PerDoc both = getPerDoc();
 293       both.docID = docState.docID;
 294       assert one.docID == docState.docID;
 295       assert two.docID == docState.docID;
 296       both.one = one;
 297       both.two = two;
 298       return both;
 299     }
 300   }
 301
 302   private static final Comparator<DocFieldProcessorPerField> fieldsComp = new Comparator<DocFieldProcessorPerField>() {
 303     public int compare(DocFieldProcessorPerField o1, DocFieldProcessorPerField o2) {
 304       return o1.fieldInfo.name.compareTo(o2.fieldInfo.name);
 305     }
 306   };
 307
 308   PerDoc[] docFreeList = new PerDoc[1];
 309   int freeCount;
 310   int allocCount;
 311
 312   synchronized PerDoc getPerDoc() {
 313     if (freeCount == 0) {
 314       allocCount++;
 315       if (allocCount > docFreeList.length) {
 316         // Grow our free list up front to make sure we have
 317         // enough space to recycle all outstanding PerDoc
 318         // instances
 319         assert allocCount == 1+docFreeList.length;
 320         docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
 321       }
 322       return new PerDoc();
 323     } else
 324       return docFreeList[--freeCount];
 325   }
 326
 327   synchronized void freePerDoc(PerDoc perDoc) {
 328     assert freeCount < docFreeList.length;
 329     docFreeList[freeCount++] = perDoc;
 330   }
 331
 332   class PerDoc extends DocumentsWriter.DocWriter {
 333
 334     DocumentsWriter.DocWriter one;
 335     DocumentsWriter.DocWriter two;
 336
 337     @Override
 338     public long sizeInBytes() {
 339       return one.sizeInBytes() + two.sizeInBytes();
 340     }
 341
 342     @Override
 343     public void finish() throws IOException {
 344       try {
 345         try {
 346           one.finish();
 347         } finally {
 348           two.finish();
 349         }
 350       } finally {
 351         freePerDoc(this);
 352       }
 353     }
 354
 355     @Override
 356     public void abort() {
 357       try {
 358         try {
 359           one.abort();
 360         } finally {
 361           two.abort();
 362         }
 363       } finally {
 364         freePerDoc(this);
 365       }
 366     }
 367   }
 368 }