--- /dev/null
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.UnicodeUtil;
+
+import java.io.IOException;
+
+final class TermVectorsWriter {
+
+ private IndexOutput tvx = null, tvd = null, tvf = null;
+ private FieldInfos fieldInfos;
+ final UnicodeUtil.UTF8Result[] utf8Results = new UnicodeUtil.UTF8Result[] {new UnicodeUtil.UTF8Result(),
+ new UnicodeUtil.UTF8Result()};
+
+ public TermVectorsWriter(Directory directory, String segment,
+ FieldInfos fieldInfos) throws IOException {
+ boolean success = false;
+ try {
+ // Open files for TermVector storage
+ tvx = directory.createOutput(IndexFileNames.segmentFileName(segment, IndexFileNames.VECTORS_INDEX_EXTENSION));
+ tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
+ tvd = directory.createOutput(IndexFileNames.segmentFileName(segment, IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
+ tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
+ tvf = directory.createOutput(IndexFileNames.segmentFileName(segment, IndexFileNames.VECTORS_FIELDS_EXTENSION));
+ tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
+ success = true;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(tvx, tvd, tvf);
+ }
+ }
+
+ this.fieldInfos = fieldInfos;
+ }
+
+ /**
+ * Add a complete document specified by all its term vectors. If document has no
+ * term vectors, add value for tvx.
+ *
+ * @param vectors
+ * @throws IOException
+ */
+ public final void addAllDocVectors(TermFreqVector[] vectors) throws IOException {
+
+ tvx.writeLong(tvd.getFilePointer());
+ tvx.writeLong(tvf.getFilePointer());
+
+ if (vectors != null) {
+ final int numFields = vectors.length;
+ tvd.writeVInt(numFields);
+
+ long[] fieldPointers = new long[numFields];
+
+ for (int i=0; i<numFields; i++) {
+ fieldPointers[i] = tvf.getFilePointer();
+
+ final int fieldNumber = fieldInfos.fieldNumber(vectors[i].getField());
+
+ // 1st pass: write field numbers to tvd
+ tvd.writeVInt(fieldNumber);
+
+ final int numTerms = vectors[i].size();
+ tvf.writeVInt(numTerms);
+
+ final TermPositionVector tpVector;
+
+ final byte bits;
+ final boolean storePositions;
+ final boolean storeOffsets;
+
+ if (vectors[i] instanceof TermPositionVector) {
+ // May have positions & offsets
+ tpVector = (TermPositionVector) vectors[i];
+ storePositions = tpVector.size() > 0 && tpVector.getTermPositions(0) != null;
+ storeOffsets = tpVector.size() > 0 && tpVector.getOffsets(0) != null;
+ bits = (byte) ((storePositions ? TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR : 0) +
+ (storeOffsets ? TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR : 0));
+ } else {
+ tpVector = null;
+ bits = 0;
+ storePositions = false;
+ storeOffsets = false;
+ }
+
+ tvf.writeVInt(bits);
+
+ final String[] terms = vectors[i].getTerms();
+ final int[] freqs = vectors[i].getTermFrequencies();
+
+ int utf8Upto = 0;
+ utf8Results[1].length = 0;
+
+ for (int j=0; j<numTerms; j++) {
+
+ UnicodeUtil.UTF16toUTF8(terms[j], 0, terms[j].length(), utf8Results[utf8Upto]);
+
+ int start = StringHelper.bytesDifference(utf8Results[1-utf8Upto].result,
+ utf8Results[1-utf8Upto].length,
+ utf8Results[utf8Upto].result,
+ utf8Results[utf8Upto].length);
+ int length = utf8Results[utf8Upto].length - start;
+ tvf.writeVInt(start); // write shared prefix length
+ tvf.writeVInt(length); // write delta length
+ tvf.writeBytes(utf8Results[utf8Upto].result, start, length); // write delta bytes
+ utf8Upto = 1-utf8Upto;
+
+ final int termFreq = freqs[j];
+
+ tvf.writeVInt(termFreq);
+
+ if (storePositions) {
+ final int[] positions = tpVector.getTermPositions(j);
+ if (positions == null)
+ throw new IllegalStateException("Trying to write positions that are null!");
+ assert positions.length == termFreq;
+
+ // use delta encoding for positions
+ int lastPosition = 0;
+ for(int k=0;k<positions.length;k++) {
+ final int position = positions[k];
+ tvf.writeVInt(position-lastPosition);
+ lastPosition = position;
+ }
+ }
+
+ if (storeOffsets) {
+ final TermVectorOffsetInfo[] offsets = tpVector.getOffsets(j);
+ if (offsets == null)
+ throw new IllegalStateException("Trying to write offsets that are null!");
+ assert offsets.length == termFreq;
+
+ // use delta encoding for offsets
+ int lastEndOffset = 0;
+ for(int k=0;k<offsets.length;k++) {
+ final int startOffset = offsets[k].getStartOffset();
+ final int endOffset = offsets[k].getEndOffset();
+ tvf.writeVInt(startOffset-lastEndOffset);
+ tvf.writeVInt(endOffset-startOffset);
+ lastEndOffset = endOffset;
+ }
+ }
+ }
+ }
+
+ // 2nd pass: write field pointers to tvd
+ if (numFields > 1) {
+ long lastFieldPointer = fieldPointers[0];
+ for (int i=1; i<numFields; i++) {
+ final long fieldPointer = fieldPointers[i];
+ tvd.writeVLong(fieldPointer-lastFieldPointer);
+ lastFieldPointer = fieldPointer;
+ }
+ }
+ } else
+ tvd.writeVInt(0);
+ }
+
+ /**
+ * Do a bulk copy of numDocs documents from reader to our
+ * streams. This is used to expedite merging, if the
+ * field numbers are congruent.
+ */
+ final void addRawDocuments(TermVectorsReader reader, int[] tvdLengths, int[] tvfLengths, int numDocs) throws IOException {
+ long tvdPosition = tvd.getFilePointer();
+ long tvfPosition = tvf.getFilePointer();
+ long tvdStart = tvdPosition;
+ long tvfStart = tvfPosition;
+ for(int i=0;i<numDocs;i++) {
+ tvx.writeLong(tvdPosition);
+ tvdPosition += tvdLengths[i];
+ tvx.writeLong(tvfPosition);
+ tvfPosition += tvfLengths[i];
+ }
+ tvd.copyBytes(reader.getTvdStream(), tvdPosition-tvdStart);
+ tvf.copyBytes(reader.getTvfStream(), tvfPosition-tvfStart);
+ assert tvd.getFilePointer() == tvdPosition;
+ assert tvf.getFilePointer() == tvfPosition;
+ }
+
+ /** Close all streams. */
+ final void close() throws IOException {
+ // make an effort to close all streams we can but remember and re-throw
+ // the first exception encountered in this process
+ IOUtils.close(tvx, tvd, tvf);
+ }
+}