lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/NormsWriter.java

   1 package org.apache.lucene.index;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.IOException;
  21 import java.util.Collection;
  22 import java.util.Iterator;
  23 import java.util.HashMap;
  24 import java.util.Map;
  25 import java.util.List;
  26 import java.util.ArrayList;
  27
  28 import org.apache.lucene.store.IndexOutput;
  29 import org.apache.lucene.search.Similarity;
  30 import org.apache.lucene.util.IOUtils;
  31
  32 // TODO FI: norms could actually be stored as doc store
  33
  34 /** Writes norms.  Each thread X field accumulates the norms
  35  *  for the doc/fields it saw, then the flush method below
  36  *  merges all of these together into a single _X.nrm file.
  37  */
  38
  39 final class NormsWriter extends InvertedDocEndConsumer {
  40
  41   private final byte defaultNorm = Similarity.getDefault().encodeNormValue(1.0f);
  42   private FieldInfos fieldInfos;
  43   @Override
  44   public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) {
  45     return new NormsWriterPerThread(docInverterPerThread, this);
  46   }
  47
  48   @Override
  49   public void abort() {}
  50
  51   // We only write the _X.nrm file at flush
  52   void files(Collection<String> files) {}
  53
  54   @Override
  55   void setFieldInfos(FieldInfos fieldInfos) {
  56     this.fieldInfos = fieldInfos;
  57   }
  58
  59   /** Produce _X.nrm if any document had a field with norms
  60    *  not disabled */
  61   @Override
  62   public void flush(Map<InvertedDocEndConsumerPerThread,Collection<InvertedDocEndConsumerPerField>> threadsAndFields, SegmentWriteState state) throws IOException {
  63
  64     final Map<FieldInfo,List<NormsWriterPerField>> byField = new HashMap<FieldInfo,List<NormsWriterPerField>>();
  65
  66     // Typically, each thread will have encountered the same
  67     // field.  So first we collate by field, ie, all
  68     // per-thread field instances that correspond to the
  69     // same FieldInfo
  70     for (final Map.Entry<InvertedDocEndConsumerPerThread,Collection<InvertedDocEndConsumerPerField>> entry : threadsAndFields.entrySet()) {
  71       final Collection<InvertedDocEndConsumerPerField> fields = entry.getValue();
  72       final Iterator<InvertedDocEndConsumerPerField> fieldsIt = fields.iterator();
  73
  74       while (fieldsIt.hasNext()) {
  75         final NormsWriterPerField perField = (NormsWriterPerField) fieldsIt.next();
  76
  77         if (perField.upto > 0) {
  78           // It has some norms
  79           List<NormsWriterPerField> l = byField.get(perField.fieldInfo);
  80           if (l == null) {
  81             l = new ArrayList<NormsWriterPerField>();
  82             byField.put(perField.fieldInfo, l);
  83           }
  84           l.add(perField);
  85         } else
  86           // Remove this field since we haven't seen it
  87           // since the previous flush
  88           fieldsIt.remove();
  89       }
  90     }
  91
  92     final String normsFileName = IndexFileNames.segmentFileName(state.segmentName, IndexFileNames.NORMS_EXTENSION);
  93     IndexOutput normsOut = state.directory.createOutput(normsFileName);
  94     boolean success = false;
  95     try {
  96       normsOut.writeBytes(SegmentNorms.NORMS_HEADER, 0, SegmentNorms.NORMS_HEADER.length);
  97
  98       final int numField = fieldInfos.size();
  99
 100       int normCount = 0;
 101
 102       for(int fieldNumber=0;fieldNumber<numField;fieldNumber++) {
 103
 104         final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
 105
 106         List<NormsWriterPerField> toMerge = byField.get(fieldInfo);
 107         int upto = 0;
 108         if (toMerge != null) {
 109
 110           final int numFields = toMerge.size();
 111
 112           normCount++;
 113
 114           final NormsWriterPerField[] fields = new NormsWriterPerField[numFields];
 115           int[] uptos = new int[numFields];
 116
 117           for(int j=0;j<numFields;j++)
 118             fields[j] = toMerge.get(j);
 119
 120           int numLeft = numFields;
 121
 122           while(numLeft > 0) {
 123
 124             assert uptos[0] < fields[0].docIDs.length : " uptos[0]=" + uptos[0] + " len=" + (fields[0].docIDs.length);
 125
 126             int minLoc = 0;
 127             int minDocID = fields[0].docIDs[uptos[0]];
 128
 129             for(int j=1;j<numLeft;j++) {
 130               final int docID = fields[j].docIDs[uptos[j]];
 131               if (docID < minDocID) {
 132                 minDocID = docID;
 133                 minLoc = j;
 134               }
 135             }
 136
 137             assert minDocID < state.numDocs;
 138
 139             // Fill hole
 140             for(;upto<minDocID;upto++)
 141               normsOut.writeByte(defaultNorm);
 142
 143             normsOut.writeByte(fields[minLoc].norms[uptos[minLoc]]);
 144             (uptos[minLoc])++;
 145             upto++;
 146
 147             if (uptos[minLoc] == fields[minLoc].upto) {
 148               fields[minLoc].reset();
 149               if (minLoc != numLeft-1) {
 150                 fields[minLoc] = fields[numLeft-1];
 151                 uptos[minLoc] = uptos[numLeft-1];
 152               }
 153               numLeft--;
 154             }
 155           }
 156
 157           // Fill final hole with defaultNorm
 158           for(;upto<state.numDocs;upto++)
 159             normsOut.writeByte(defaultNorm);
 160         } else if (fieldInfo.isIndexed && !fieldInfo.omitNorms) {
 161           normCount++;
 162           // Fill entire field with default norm:
 163           for(;upto<state.numDocs;upto++)
 164             normsOut.writeByte(defaultNorm);
 165         }
 166
 167         assert 4+normCount*state.numDocs == normsOut.getFilePointer() : ".nrm file size mismatch: expected=" + (4+normCount*state.numDocs) + " actual=" + normsOut.getFilePointer();
 168       }
 169       success = true;
 170     } finally {
 171       if (success) {
 172         IOUtils.close(normsOut);
 173       } else {
 174         IOUtils.closeWhileHandlingException(normsOut);
 175       }
 176     }
 177   }
 178 }