--- /dev/null
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMOutputStream;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.RamUsageEstimator;
+
+import java.io.IOException;
+import java.util.Collection;
+
+import java.util.Map;
+
+final class TermVectorsTermsWriter extends TermsHashConsumer {
+
+ final DocumentsWriter docWriter;
+ PerDoc[] docFreeList = new PerDoc[1];
+ int freeCount;
+ IndexOutput tvx;
+ IndexOutput tvd;
+ IndexOutput tvf;
+ int lastDocID;
+ boolean hasVectors;
+
+ public TermVectorsTermsWriter(DocumentsWriter docWriter) {
+ this.docWriter = docWriter;
+ }
+
+ @Override
+ public TermsHashConsumerPerThread addThread(TermsHashPerThread termsHashPerThread) {
+ return new TermVectorsTermsWriterPerThread(termsHashPerThread, this);
+ }
+
+ @Override
+ synchronized void flush(Map<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>> threadsAndFields, final SegmentWriteState state) throws IOException {
+ if (tvx != null) {
+ // At least one doc in this run had term vectors enabled
+ fill(state.numDocs);
+ IOUtils.close(tvx, tvf, tvd);
+ tvx = tvd = tvf = null;
+ assert state.segmentName != null;
+ String idxName = IndexFileNames.segmentFileName(state.segmentName, IndexFileNames.VECTORS_INDEX_EXTENSION);
+ if (4 + ((long) state.numDocs) * 16 != state.directory.fileLength(idxName)) {
+ throw new RuntimeException("after flush: tvx size mismatch: " + state.numDocs + " docs vs " + state.directory.fileLength(idxName) + " length in bytes of " + idxName + " file exists?=" + state.directory.fileExists(idxName));
+ }
+
+ lastDocID = 0;
+ state.hasVectors = hasVectors;
+ hasVectors = false;
+ }
+
+ for (Map.Entry<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>> entry : threadsAndFields.entrySet()) {
+ for (final TermsHashConsumerPerField field : entry.getValue() ) {
+ TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) field;
+ perField.termsHashPerField.reset();
+ perField.shrinkHash();
+ }
+
+ TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.getKey();
+ perThread.termsHashPerThread.reset(true);
+ }
+ }
+
+ int allocCount;
+
+ synchronized PerDoc getPerDoc() {
+ if (freeCount == 0) {
+ allocCount++;
+ if (allocCount > docFreeList.length) {
+ // Grow our free list up front to make sure we have
+ // enough space to recycle all outstanding PerDoc
+ // instances
+ assert allocCount == 1+docFreeList.length;
+ docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+ }
+ return new PerDoc();
+ } else {
+ return docFreeList[--freeCount];
+ }
+ }
+
+ /** Fills in no-term-vectors for all docs we haven't seen
+ * since the last doc that had term vectors. */
+ void fill(int docID) throws IOException {
+ if (lastDocID < docID) {
+ final long tvfPosition = tvf.getFilePointer();
+ while(lastDocID < docID) {
+ tvx.writeLong(tvd.getFilePointer());
+ tvd.writeVInt(0);
+ tvx.writeLong(tvfPosition);
+ lastDocID++;
+ }
+ }
+ }
+
+ synchronized void initTermVectorsWriter() throws IOException {
+ if (tvx == null) {
+ boolean success = false;
+ try {
+ // If we hit an exception while init'ing the term
+ // vector output files, we must abort this segment
+ // because those files will be in an unknown
+ // state:
+ hasVectors = true;
+ tvx = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), IndexFileNames.VECTORS_INDEX_EXTENSION));
+ tvd = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
+ tvf = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), IndexFileNames.VECTORS_FIELDS_EXTENSION));
+
+ tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
+ tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
+ tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
+ success = true;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(tvx, tvd, tvf);
+ }
+ }
+ lastDocID = 0;
+ }
+ }
+
+ synchronized void finishDocument(PerDoc perDoc) throws IOException {
+
+ assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument start");
+
+ initTermVectorsWriter();
+
+ fill(perDoc.docID);
+
+ // Append term vectors to the real outputs:
+ tvx.writeLong(tvd.getFilePointer());
+ tvx.writeLong(tvf.getFilePointer());
+ tvd.writeVInt(perDoc.numVectorFields);
+ if (perDoc.numVectorFields > 0) {
+ for(int i=0;i<perDoc.numVectorFields;i++) {
+ tvd.writeVInt(perDoc.fieldNumbers[i]);
+ }
+ assert 0 == perDoc.fieldPointers[0];
+ long lastPos = perDoc.fieldPointers[0];
+ for(int i=1;i<perDoc.numVectorFields;i++) {
+ long pos = perDoc.fieldPointers[i];
+ tvd.writeVLong(pos-lastPos);
+ lastPos = pos;
+ }
+ perDoc.perDocTvf.writeTo(tvf);
+ perDoc.numVectorFields = 0;
+ }
+
+ assert lastDocID == perDoc.docID: "lastDocID=" + lastDocID + " perDoc.docID=" + perDoc.docID;
+
+ lastDocID++;
+
+ perDoc.reset();
+ free(perDoc);
+ assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument end");
+ }
+
+ @Override
+ public void abort() {
+ hasVectors = false;
+ try {
+ IOUtils.closeWhileHandlingException(tvx, tvd, tvf);
+ } catch (IOException e) {
+ // cannot happen since we suppress exceptions
+ throw new RuntimeException(e);
+ }
+
+ try {
+ docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), IndexFileNames.VECTORS_INDEX_EXTENSION));
+ } catch (IOException ignored) {
+ }
+
+ try {
+ docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
+ } catch (IOException ignored) {
+ }
+
+ try {
+ docWriter.directory.deleteFile(IndexFileNames.segmentFileName(docWriter.getSegment(), IndexFileNames.VECTORS_FIELDS_EXTENSION));
+ } catch (IOException ignored) {
+ }
+
+ tvx = tvd = tvf = null;
+ lastDocID = 0;
+ }
+
+ synchronized void free(PerDoc doc) {
+ assert freeCount < docFreeList.length;
+ docFreeList[freeCount++] = doc;
+ }
+
+ class PerDoc extends DocumentsWriter.DocWriter {
+
+ final DocumentsWriter.PerDocBuffer buffer = docWriter.newPerDocBuffer();
+ RAMOutputStream perDocTvf = new RAMOutputStream(buffer);
+
+ int numVectorFields;
+
+ int[] fieldNumbers = new int[1];
+ long[] fieldPointers = new long[1];
+
+ void reset() {
+ perDocTvf.reset();
+ buffer.recycle();
+ numVectorFields = 0;
+ }
+
+ @Override
+ void abort() {
+ reset();
+ free(this);
+ }
+
+ void addField(final int fieldNumber) {
+ if (numVectorFields == fieldNumbers.length) {
+ fieldNumbers = ArrayUtil.grow(fieldNumbers);
+ }
+ if (numVectorFields == fieldPointers.length) {
+ fieldPointers = ArrayUtil.grow(fieldPointers);
+ }
+ fieldNumbers[numVectorFields] = fieldNumber;
+ fieldPointers[numVectorFields] = perDocTvf.getFilePointer();
+ numVectorFields++;
+ }
+
+ @Override
+ public long sizeInBytes() {
+ return buffer.getSizeInBytes();
+ }
+
+ @Override
+ public void finish() throws IOException {
+ finishDocument(this);
+ }
+ }
+}