lucene-java-3.4.0/lucene/src/java/org/apache/lucene/index/MergePolicy.java

   1 package org.apache.lucene.index;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import org.apache.lucene.store.Directory;
  21 import org.apache.lucene.util.SetOnce;
  22 import org.apache.lucene.util.SetOnce.AlreadySetException;
  23
  24 import java.io.IOException;
  25 import java.util.List;
  26 import java.util.ArrayList;
  27 import java.util.Map;
  28
  29 /**
  30  * <p>Expert: a MergePolicy determines the sequence of
  31  * primitive merge operations to be used for overall merge
  32  * and optimize operations.</p>
  33  *
  34  * <p>Whenever the segments in an index have been altered by
  35  * {@link IndexWriter}, either the addition of a newly
  36  * flushed segment, addition of many segments from
  37  * addIndexes* calls, or a previous merge that may now need
  38  * to cascade, {@link IndexWriter} invokes {@link
  39  * #findMerges} to give the MergePolicy a chance to pick
  40  * merges that are now required.  This method returns a
  41  * {@link MergeSpecification} instance describing the set of
  42  * merges that should be done, or null if no merges are
  43  * necessary.  When IndexWriter.optimize is called, it calls
  44  * {@link #findMergesForOptimize} and the MergePolicy should
  45  * then return the necessary merges.</p>
  46  *
  47  * <p>Note that the policy can return more than one merge at
  48  * a time.  In this case, if the writer is using {@link
  49  * SerialMergeScheduler}, the merges will be run
  50  * sequentially but if it is using {@link
  51  * ConcurrentMergeScheduler} they will be run concurrently.</p>
  52  *
  53  * <p>The default MergePolicy is {@link
  54  * TieredMergePolicy}.</p>
  55  *
  56  * @lucene.experimental
  57  */
  58
  59 public abstract class MergePolicy implements java.io.Closeable {
  60
  61   /** OneMerge provides the information necessary to perform
  62    *  an individual primitive merge operation, resulting in
  63    *  a single new segment.  The merge spec includes the
  64    *  subset of segments to be merged as well as whether the
  65    *  new segment should use the compound file format. */
  66
  67   public static class OneMerge {
  68
  69     SegmentInfo info;               // used by IndexWriter
  70     boolean optimize;               // used by IndexWriter
  71     boolean registerDone;           // used by IndexWriter
  72     long mergeGen;                  // used by IndexWriter
  73     boolean isExternal;             // used by IndexWriter
  74     int maxNumSegmentsOptimize;     // used by IndexWriter
  75     public long estimatedMergeBytes;       // used by IndexWriter
  76     List<SegmentReader> readers;        // used by IndexWriter
  77     List<SegmentReader> readerClones;   // used by IndexWriter
  78     public final List<SegmentInfo> segments;
  79     public final int totalDocCount;
  80     boolean aborted;
  81     Throwable error;
  82     boolean paused;
  83
  84     public OneMerge(List<SegmentInfo> segments) {
  85       if (0 == segments.size())
  86         throw new RuntimeException("segments must include at least one segment");
  87       // clone the list, as the in list may be based off original SegmentInfos and may be modified
  88       this.segments = new ArrayList<SegmentInfo>(segments);
  89       int count = 0;
  90       for(SegmentInfo info : segments) {
  91         count += info.docCount;
  92       }
  93       totalDocCount = count;
  94     }
  95
  96     /** Record that an exception occurred while executing
  97      *  this merge */
  98     synchronized void setException(Throwable error) {
  99       this.error = error;
 100     }
 101
 102     /** Retrieve previous exception set by {@link
 103      *  #setException}. */
 104     synchronized Throwable getException() {
 105       return error;
 106     }
 107
 108     /** Mark this merge as aborted.  If this is called
 109      *  before the merge is committed then the merge will
 110      *  not be committed. */
 111     synchronized void abort() {
 112       aborted = true;
 113       notifyAll();
 114     }
 115
 116     /** Returns true if this merge was aborted. */
 117     synchronized boolean isAborted() {
 118       return aborted;
 119     }
 120
 121     synchronized void checkAborted(Directory dir) throws MergeAbortedException {
 122       if (aborted) {
 123         throw new MergeAbortedException("merge is aborted: " + segString(dir));
 124       }
 125
 126       while (paused) {
 127         try {
 128           // In theory we could wait() indefinitely, but we
 129           // do 1000 msec, defensively
 130           wait(1000);
 131         } catch (InterruptedException ie) {
 132           throw new RuntimeException(ie);
 133         }
 134         if (aborted) {
 135           throw new MergeAbortedException("merge is aborted: " + segString(dir));
 136         }
 137       }
 138     }
 139
 140     synchronized public void setPause(boolean paused) {
 141       this.paused = paused;
 142       if (!paused) {
 143         // Wakeup merge thread, if it's waiting
 144         notifyAll();
 145       }
 146     }
 147
 148     synchronized public boolean getPause() {
 149       return paused;
 150     }
 151
 152     public String segString(Directory dir) {
 153       StringBuilder b = new StringBuilder();
 154       final int numSegments = segments.size();
 155       for(int i=0;i<numSegments;i++) {
 156         if (i > 0) b.append(' ');
 157         b.append(segments.get(i).toString(dir, 0));
 158       }
 159       if (info != null)
 160         b.append(" into ").append(info.name);
 161       if (optimize)
 162         b.append(" [optimize]");
 163       if (aborted) {
 164         b.append(" [ABORTED]");
 165       }
 166       return b.toString();
 167     }
 168
 169     /**
 170      * Returns the total size in bytes of this merge. Note that this does not
 171      * indicate the size of the merged segment, but the input total size.
 172      * */
 173     public long totalBytesSize() throws IOException {
 174       long total = 0;
 175       for (SegmentInfo info : segments) {
 176         total += info.sizeInBytes(true);
 177       }
 178       return total;
 179     }
 180
 181     /**
 182      * Returns the total number of documents that are included with this merge.
 183      * Note that this does not indicate the number of documents after the merge.
 184      * */
 185     public int totalNumDocs() throws IOException {
 186       int total = 0;
 187       for (SegmentInfo info : segments) {
 188         total += info.docCount;
 189       }
 190       return total;
 191     }
 192   }
 193
 194   /**
 195    * A MergeSpecification instance provides the information
 196    * necessary to perform multiple merges.  It simply
 197    * contains a list of {@link OneMerge} instances.
 198    */
 199
 200   public static class MergeSpecification {
 201
 202     /**
 203      * The subset of segments to be included in the primitive merge.
 204      */
 205
 206     public final List<OneMerge> merges = new ArrayList<OneMerge>();
 207
 208     public void add(OneMerge merge) {
 209       merges.add(merge);
 210     }
 211
 212     public String segString(Directory dir) {
 213       StringBuilder b = new StringBuilder();
 214       b.append("MergeSpec:\n");
 215       final int count = merges.size();
 216       for(int i=0;i<count;i++)
 217         b.append("  ").append(1 + i).append(": ").append(merges.get(i).segString(dir));
 218       return b.toString();
 219     }
 220   }
 221
 222   /** Exception thrown if there are any problems while
 223    *  executing a merge. */
 224   public static class MergeException extends RuntimeException {
 225     private Directory dir;
 226
 227     public MergeException(String message, Directory dir) {
 228       super(message);
 229       this.dir = dir;
 230     }
 231
 232     public MergeException(Throwable exc, Directory dir) {
 233       super(exc);
 234       this.dir = dir;
 235     }
 236     /** Returns the {@link Directory} of the index that hit
 237      *  the exception. */
 238     public Directory getDirectory() {
 239       return dir;
 240     }
 241   }
 242
 243   public static class MergeAbortedException extends IOException {
 244     public MergeAbortedException() {
 245       super("merge is aborted");
 246     }
 247     public MergeAbortedException(String message) {
 248       super(message);
 249     }
 250   }
 251
 252   protected final SetOnce<IndexWriter> writer;
 253
 254   /**
 255    * Creates a new merge policy instance. Note that if you intend to use it
 256    * without passing it to {@link IndexWriter}, you should call
 257    * {@link #setIndexWriter(IndexWriter)}.
 258    */
 259   public MergePolicy() {
 260     writer = new SetOnce<IndexWriter>();
 261   }
 262
 263   /**
 264    * Sets the {@link IndexWriter} to use by this merge policy. This method is
 265    * allowed to be called only once, and is usually set by IndexWriter. If it is
 266    * called more than once, {@link AlreadySetException} is thrown.
 267    *
 268    * @see SetOnce
 269    */
 270   public void setIndexWriter(IndexWriter writer) {
 271     this.writer.set(writer);
 272   }
 273
 274   /**
 275    * Determine what set of merge operations are now necessary on the index.
 276    * {@link IndexWriter} calls this whenever there is a change to the segments.
 277    * This call is always synchronized on the {@link IndexWriter} instance so
 278    * only one thread at a time will call this method.
 279    *
 280    * @param segmentInfos
 281    *          the total set of segments in the index
 282    */
 283   public abstract MergeSpecification findMerges(SegmentInfos segmentInfos)
 284       throws CorruptIndexException, IOException;
 285
 286   /**
 287    * Determine what set of merge operations is necessary in order to optimize
 288    * the index. {@link IndexWriter} calls this when its
 289    * {@link IndexWriter#optimize()} method is called. This call is always
 290    * synchronized on the {@link IndexWriter} instance so only one thread at a
 291    * time will call this method.
 292    *
 293    * @param segmentInfos
 294    *          the total set of segments in the index
 295    * @param maxSegmentCount
 296    *          requested maximum number of segments in the index (currently this
 297    *          is always 1)
 298    * @param segmentsToOptimize
 299    *          contains the specific SegmentInfo instances that must be merged
 300    *          away. This may be a subset of all
 301    *          SegmentInfos.  If the value is True for a
 302    *          given SegmentInfo, that means this segment was
 303    *          an original segment present in the
 304    *          to-be-optimized index; else, it was a segment
 305    *          produced by a cascaded merge.
 306    */
 307   public abstract MergeSpecification findMergesForOptimize(
 308           SegmentInfos segmentInfos, int maxSegmentCount, Map<SegmentInfo,Boolean> segmentsToOptimize)
 309       throws CorruptIndexException, IOException;
 310
 311   /**
 312    * Determine what set of merge operations is necessary in order to expunge all
 313    * deletes from the index.
 314    *
 315    * @param segmentInfos
 316    *          the total set of segments in the index
 317    */
 318   public abstract MergeSpecification findMergesToExpungeDeletes(
 319       SegmentInfos segmentInfos) throws CorruptIndexException, IOException;
 320
 321   /**
 322    * Release all resources for the policy.
 323    */
 324   public abstract void close();
 325
 326   /** Returns true if a new segment (regardless of its origin) should use the compound file format. */
 327   public abstract boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment) throws IOException;
 328 }