lucene-java-3.4.0/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java

   1 package org.apache.lucene.index;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import org.apache.lucene.store.Directory;
  21 import org.apache.lucene.store.IndexOutput;
  22 import org.apache.lucene.store.IndexInput;
  23 import org.apache.lucene.util.IOUtils;
  24
  25 import java.util.LinkedList;
  26 import java.util.HashSet;
  27
  28 import java.io.IOException;
  29
  30 /**
  31  * Combines multiple files into a single compound file.
  32  * The file format:<br>
  33  * <ul>
  34  *     <li>VInt fileCount</li>
  35  *     <li>{Directory}
  36  *         fileCount entries with the following structure:</li>
  37  *         <ul>
  38  *             <li>long dataOffset</li>
  39  *             <li>String fileName</li>
  40  *         </ul>
  41  *     <li>{File Data}
  42  *         fileCount entries with the raw data of the corresponding file</li>
  43  * </ul>
  44  *
  45  * The fileCount integer indicates how many files are contained in this compound
  46  * file. The {directory} that follows has that many entries. Each directory entry
  47  * contains a long pointer to the start of this file's data section, and a String
  48  * with that file's name.
  49  *
  50  * @lucene.internal
  51  */
  52 public final class CompoundFileWriter {
  53
  54     private static final class FileEntry {
  55         /** source file */
  56         String file;
  57
  58         /** temporary holder for the start of directory entry for this file */
  59         long directoryOffset;
  60
  61         /** temporary holder for the start of this file's data section */
  62         long dataOffset;
  63
  64         /** the directory which contains the file. */
  65         Directory dir;
  66     }
  67
  68     // Before versioning started.
  69     static final int FORMAT_PRE_VERSION = 0;
  70
  71     // Segment name is not written in the file names.
  72     static final int FORMAT_NO_SEGMENT_PREFIX = -1;
  73
  74     // NOTE: if you introduce a new format, make it 1 lower
  75     // than the current one, and always change this if you
  76     // switch to a new format!
  77     static final int FORMAT_CURRENT = FORMAT_NO_SEGMENT_PREFIX;
  78
  79     private Directory directory;
  80     private String fileName;
  81     private HashSet<String> ids;
  82     private LinkedList<FileEntry> entries;
  83     private boolean merged = false;
  84     private SegmentMerger.CheckAbort checkAbort;
  85
  86     /** Create the compound stream in the specified file. The file name is the
  87      *  entire name (no extensions are added).
  88      *  @throws NullPointerException if <code>dir</code> or <code>name</code> is null
  89      */
  90     public CompoundFileWriter(Directory dir, String name) {
  91       this(dir, name, null);
  92     }
  93
  94     CompoundFileWriter(Directory dir, String name, SegmentMerger.CheckAbort checkAbort) {
  95         if (dir == null)
  96             throw new NullPointerException("directory cannot be null");
  97         if (name == null)
  98             throw new NullPointerException("name cannot be null");
  99         this.checkAbort = checkAbort;
 100         directory = dir;
 101         fileName = name;
 102         ids = new HashSet<String>();
 103         entries = new LinkedList<FileEntry>();
 104     }
 105
 106     /** Returns the directory of the compound file. */
 107     public Directory getDirectory() {
 108         return directory;
 109     }
 110
 111     /** Returns the name of the compound file. */
 112     public String getName() {
 113         return fileName;
 114     }
 115
 116     /** Add a source stream. <code>file</code> is the string by which the
 117      *  sub-stream will be known in the compound stream.
 118      *
 119      *  @throws IllegalStateException if this writer is closed
 120      *  @throws NullPointerException if <code>file</code> is null
 121      *  @throws IllegalArgumentException if a file with the same name
 122      *   has been added already
 123      */
 124     public void addFile(String file) {
 125       addFile(file, directory);
 126     }
 127
 128     /**
 129      * Same as {@link #addFile(String)}, only for files that are found in an
 130      * external {@link Directory}.
 131      */
 132     public void addFile(String file, Directory dir) {
 133         if (merged)
 134             throw new IllegalStateException(
 135                 "Can't add extensions after merge has been called");
 136
 137         if (file == null)
 138             throw new NullPointerException(
 139                 "file cannot be null");
 140
 141         if (! ids.add(file))
 142             throw new IllegalArgumentException(
 143                 "File " + file + " already added");
 144
 145         FileEntry entry = new FileEntry();
 146         entry.file = file;
 147         entry.dir = dir;
 148         entries.add(entry);
 149     }
 150
 151     /** Merge files with the extensions added up to now.
 152      *  All files with these extensions are combined sequentially into the
 153      *  compound stream.
 154      *  @throws IllegalStateException if close() had been called before or
 155      *   if no file has been added to this object
 156      */
 157     public void close() throws IOException {
 158         if (merged)
 159             throw new IllegalStateException("Merge already performed");
 160
 161         if (entries.isEmpty())
 162             throw new IllegalStateException("No entries to merge have been defined");
 163
 164         merged = true;
 165
 166         // open the compound stream
 167         IndexOutput os = directory.createOutput(fileName);
 168         IOException priorException = null;
 169         try {
 170             // Write the Version info - must be a VInt because CFR reads a VInt
 171             // in older versions!
 172             os.writeVInt(FORMAT_CURRENT);
 173
 174             // Write the number of entries
 175             os.writeVInt(entries.size());
 176
 177             // Write the directory with all offsets at 0.
 178             // Remember the positions of directory entries so that we can
 179             // adjust the offsets later
 180             long totalSize = 0;
 181             for (FileEntry fe : entries) {
 182                 fe.directoryOffset = os.getFilePointer();
 183                 os.writeLong(0);    // for now
 184                 os.writeString(IndexFileNames.stripSegmentName(fe.file));
 185                 totalSize += fe.dir.fileLength(fe.file);
 186             }
 187
 188             // Pre-allocate size of file as optimization --
 189             // this can potentially help IO performance as
 190             // we write the file and also later during
 191             // searching.  It also uncovers a disk-full
 192             // situation earlier and hopefully without
 193             // actually filling disk to 100%:
 194             final long finalLength = totalSize+os.getFilePointer();
 195             os.setLength(finalLength);
 196
 197             // Open the files and copy their data into the stream.
 198             // Remember the locations of each file's data section.
 199             for (FileEntry fe : entries) {
 200                 fe.dataOffset = os.getFilePointer();
 201                 copyFile(fe, os);
 202             }
 203
 204             // Write the data offsets into the directory of the compound stream
 205             for (FileEntry fe : entries) {
 206                 os.seek(fe.directoryOffset);
 207                 os.writeLong(fe.dataOffset);
 208             }
 209
 210             assert finalLength == os.length();
 211
 212             // Close the output stream. Set the os to null before trying to
 213             // close so that if an exception occurs during the close, the
 214             // finally clause below will not attempt to close the stream
 215             // the second time.
 216             IndexOutput tmp = os;
 217             os = null;
 218             tmp.close();
 219         } catch (IOException e) {
 220           priorException = e;
 221         } finally {
 222           IOUtils.closeWhileHandlingException(priorException, os);
 223         }
 224     }
 225
 226   /**
 227    * Copy the contents of the file with specified extension into the provided
 228    * output stream.
 229    */
 230   private void copyFile(FileEntry source, IndexOutput os) throws IOException {
 231     IndexInput is = source.dir.openInput(source.file);
 232     try {
 233       long startPtr = os.getFilePointer();
 234       long length = is.length();
 235       os.copyBytes(is, length);
 236
 237       if (checkAbort != null) {
 238         checkAbort.work(length);
 239       }
 240
 241       // Verify that the output length diff is equal to original file
 242       long endPtr = os.getFilePointer();
 243       long diff = endPtr - startPtr;
 244       if (diff != length)
 245         throw new IOException("Difference in the output file offsets " + diff
 246             + " does not match the original file length " + length);
 247
 248     } finally {
 249       is.close();
 250     }
 251   }
 252 }