1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.store.IndexOutput;
21 import org.apache.lucene.store.RAMOutputStream;
22 import org.apache.lucene.util.ArrayUtil;
23 import org.apache.lucene.util.IOUtils;
24 import org.apache.lucene.util.RamUsageEstimator;
26 import java.io.IOException;
27 import java.util.Collection;
31 final class TermVectorsTermsWriter extends TermsHashConsumer {
33 final DocumentsWriter docWriter;
34 PerDoc[] docFreeList = new PerDoc[1];
42 public TermVectorsTermsWriter(DocumentsWriter docWriter) {
43 this.docWriter = docWriter;
47 public TermsHashConsumerPerThread addThread(TermsHashPerThread termsHashPerThread) {
48 return new TermVectorsTermsWriterPerThread(termsHashPerThread, this);
52 synchronized void flush(Map<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>> threadsAndFields, final SegmentWriteState state) throws IOException {
54 // At least one doc in this run had term vectors enabled
56 IOUtils.close(tvx, tvf, tvd);
57 tvx = tvd = tvf = null;
58 assert state.segmentName != null;
59 String idxName = IndexFileNames.segmentFileName(state.segmentName, IndexFileNames.VECTORS_INDEX_EXTENSION);
60 if (4 + ((long) state.numDocs) * 16 != state.directory.fileLength(idxName)) {
61 throw new RuntimeException("after flush: tvx size mismatch: " + state.numDocs + " docs vs " + state.directory.fileLength(idxName) + " length in bytes of " + idxName + " file exists?=" + state.directory.fileExists(idxName));
65 state.hasVectors = hasVectors;
69 for (Map.Entry<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>> entry : threadsAndFields.entrySet()) {
70 for (final TermsHashConsumerPerField field : entry.getValue() ) {
71 TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) field;
72 perField.termsHashPerField.reset();
73 perField.shrinkHash();
76 TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.getKey();
77 perThread.termsHashPerThread.reset(true);
83 synchronized PerDoc getPerDoc() {
86 if (allocCount > docFreeList.length) {
87 // Grow our free list up front to make sure we have
88 // enough space to recycle all outstanding PerDoc
90 assert allocCount == 1+docFreeList.length;
91 docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
95 return docFreeList[--freeCount];
99 /** Fills in no-term-vectors for all docs we haven't seen
100 * since the last doc that had term vectors. */
101 void fill(int docID) throws IOException {
102 if (lastDocID < docID) {
103 final long tvfPosition = tvf.getFilePointer();
104 while(lastDocID < docID) {
105 tvx.writeLong(tvd.getFilePointer());
107 tvx.writeLong(tvfPosition);
113 synchronized void initTermVectorsWriter() throws IOException {
115 boolean success = false;
117 // If we hit an exception while init'ing the term
118 // vector output files, we must abort this segment
119 // because those files will be in an unknown
122 tvx = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), IndexFileNames.VECTORS_INDEX_EXTENSION));
123 tvd = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
124 tvf = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), IndexFileNames.VECTORS_FIELDS_EXTENSION));
126 tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
127 tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
128 tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
132 IOUtils.closeWhileHandlingException(tvx, tvd, tvf);
139 synchronized void finishDocument(PerDoc perDoc) throws IOException {
141 assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument start");
143 initTermVectorsWriter();
147 // Append term vectors to the real outputs:
148 tvx.writeLong(tvd.getFilePointer());
149 tvx.writeLong(tvf.getFilePointer());
150 tvd.writeVInt(perDoc.numVectorFields);
151 if (perDoc.numVectorFields > 0) {
152 for(int i=0;i<perDoc.numVectorFields;i++) {
153 tvd.writeVInt(perDoc.fieldNumbers[i]);
155 assert 0 == perDoc.fieldPointers[0];
156 long lastPos = perDoc.fieldPointers[0];
157 for(int i=1;i<perDoc.numVectorFields;i++) {
158 long pos = perDoc.fieldPointers[i];
159 tvd.writeVLong(pos-lastPos);
162 perDoc.perDocTvf.writeTo(tvf);
163 perDoc.numVectorFields = 0;
166 assert lastDocID == perDoc.docID: "lastDocID=" + lastDocID + " perDoc.docID=" + perDoc.docID;
172 assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument end");
176 public void abort() {
179 IOUtils.closeWhileHandlingException(tvx, tvd, tvf);
180 } catch (IOException e) {
181 // cannot happen since we suppress exceptions
182 throw new RuntimeException(e);
186 docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), IndexFileNames.VECTORS_INDEX_EXTENSION));
187 } catch (IOException ignored) {
191 docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
192 } catch (IOException ignored) {
196 docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), IndexFileNames.VECTORS_FIELDS_EXTENSION));
197 } catch (IOException ignored) {
200 tvx = tvd = tvf = null;
204 synchronized void free(PerDoc doc) {
205 assert freeCount < docFreeList.length;
206 docFreeList[freeCount++] = doc;
209 class PerDoc extends DocumentsWriter.DocWriter {
211 final DocumentsWriter.PerDocBuffer buffer = docWriter.newPerDocBuffer();
212 RAMOutputStream perDocTvf = new RAMOutputStream(buffer);
216 int[] fieldNumbers = new int[1];
217 long[] fieldPointers = new long[1];
231 void addField(final int fieldNumber) {
232 if (numVectorFields == fieldNumbers.length) {
233 fieldNumbers = ArrayUtil.grow(fieldNumbers);
235 if (numVectorFields == fieldPointers.length) {
236 fieldPointers = ArrayUtil.grow(fieldPointers);
238 fieldNumbers[numVectorFields] = fieldNumber;
239 fieldPointers[numVectorFields] = perDocTvf.getFilePointer();
244 public long sizeInBytes() {
245 return buffer.getSizeInBytes();
249 public void finish() throws IOException {
250 finishDocument(this);