1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.store.Directory;
21 import org.apache.lucene.store.IndexOutput;
22 import org.apache.lucene.store.IndexInput;
23 import org.apache.lucene.util.IOUtils;
25 import java.util.LinkedList;
26 import java.util.HashSet;
28 import java.io.IOException;
31 * Combines multiple files into a single compound file.
32 * The file format:<br>
34 * <li>VInt fileCount</li>
36 * fileCount entries with the following structure:</li>
38 * <li>long dataOffset</li>
39 * <li>String fileName</li>
42 * fileCount entries with the raw data of the corresponding file</li>
45 * The fileCount integer indicates how many files are contained in this compound
46 * file. The {directory} that follows has that many entries. Each directory entry
47 * contains a long pointer to the start of this file's data section, and a String
48 * with that file's name.
52 public final class CompoundFileWriter {
54 private static final class FileEntry {
58 /** temporary holder for the start of directory entry for this file */
61 /** temporary holder for the start of this file's data section */
64 /** the directory which contains the file. */
68 // Before versioning started.
69 static final int FORMAT_PRE_VERSION = 0;
71 // Segment name is not written in the file names.
72 static final int FORMAT_NO_SEGMENT_PREFIX = -1;
74 // NOTE: if you introduce a new format, make it 1 lower
75 // than the current one, and always change this if you
76 // switch to a new format!
77 static final int FORMAT_CURRENT = FORMAT_NO_SEGMENT_PREFIX;
79 private Directory directory;
80 private String fileName;
81 private HashSet<String> ids;
82 private LinkedList<FileEntry> entries;
83 private boolean merged = false;
84 private SegmentMerger.CheckAbort checkAbort;
86 /** Create the compound stream in the specified file. The file name is the
87 * entire name (no extensions are added).
88 * @throws NullPointerException if <code>dir</code> or <code>name</code> is null
90 public CompoundFileWriter(Directory dir, String name) {
91 this(dir, name, null);
94 CompoundFileWriter(Directory dir, String name, SegmentMerger.CheckAbort checkAbort) {
96 throw new NullPointerException("directory cannot be null");
98 throw new NullPointerException("name cannot be null");
99 this.checkAbort = checkAbort;
102 ids = new HashSet<String>();
103 entries = new LinkedList<FileEntry>();
106 /** Returns the directory of the compound file. */
107 public Directory getDirectory() {
111 /** Returns the name of the compound file. */
112 public String getName() {
116 /** Add a source stream. <code>file</code> is the string by which the
117 * sub-stream will be known in the compound stream.
119 * @throws IllegalStateException if this writer is closed
120 * @throws NullPointerException if <code>file</code> is null
121 * @throws IllegalArgumentException if a file with the same name
122 * has been added already
124 public void addFile(String file) {
125 addFile(file, directory);
129 * Same as {@link #addFile(String)}, only for files that are found in an
130 * external {@link Directory}.
132 public void addFile(String file, Directory dir) {
134 throw new IllegalStateException(
135 "Can't add extensions after merge has been called");
138 throw new NullPointerException(
139 "file cannot be null");
142 throw new IllegalArgumentException(
143 "File " + file + " already added");
145 FileEntry entry = new FileEntry();
151 /** Merge files with the extensions added up to now.
152 * All files with these extensions are combined sequentially into the
154 * @throws IllegalStateException if close() had been called before or
155 * if no file has been added to this object
157 public void close() throws IOException {
159 throw new IllegalStateException("Merge already performed");
161 if (entries.isEmpty())
162 throw new IllegalStateException("No entries to merge have been defined");
166 // open the compound stream
167 IndexOutput os = directory.createOutput(fileName);
168 IOException priorException = null;
170 // Write the Version info - must be a VInt because CFR reads a VInt
171 // in older versions!
172 os.writeVInt(FORMAT_CURRENT);
174 // Write the number of entries
175 os.writeVInt(entries.size());
177 // Write the directory with all offsets at 0.
178 // Remember the positions of directory entries so that we can
179 // adjust the offsets later
181 for (FileEntry fe : entries) {
182 fe.directoryOffset = os.getFilePointer();
183 os.writeLong(0); // for now
184 os.writeString(IndexFileNames.stripSegmentName(fe.file));
185 totalSize += fe.dir.fileLength(fe.file);
188 // Pre-allocate size of file as optimization --
189 // this can potentially help IO performance as
190 // we write the file and also later during
191 // searching. It also uncovers a disk-full
192 // situation earlier and hopefully without
193 // actually filling disk to 100%:
194 final long finalLength = totalSize+os.getFilePointer();
195 os.setLength(finalLength);
197 // Open the files and copy their data into the stream.
198 // Remember the locations of each file's data section.
199 for (FileEntry fe : entries) {
200 fe.dataOffset = os.getFilePointer();
204 // Write the data offsets into the directory of the compound stream
205 for (FileEntry fe : entries) {
206 os.seek(fe.directoryOffset);
207 os.writeLong(fe.dataOffset);
210 assert finalLength == os.length();
212 // Close the output stream. Set the os to null before trying to
213 // close so that if an exception occurs during the close, the
214 // finally clause below will not attempt to close the stream
216 IndexOutput tmp = os;
219 } catch (IOException e) {
222 IOUtils.closeWhileHandlingException(priorException, os);
227 * Copy the contents of the file with specified extension into the provided
230 private void copyFile(FileEntry source, IndexOutput os) throws IOException {
231 IndexInput is = source.dir.openInput(source.file);
233 long startPtr = os.getFilePointer();
234 long length = is.length();
235 os.copyBytes(is, length);
237 if (checkAbort != null) {
238 checkAbort.work(length);
241 // Verify that the output length diff is equal to original file
242 long endPtr = os.getFilePointer();
243 long diff = endPtr - startPtr;
245 throw new IOException("Difference in the output file offsets " + diff
246 + " does not match the original file length " + length);