lucene-java-3.4.0/lucene/contrib/misc/src/java/org/apache/lucene/index/PKIndexSplitter.java

   1 package org.apache.lucene.index;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.IOException;
  21
  22 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
  23 import org.apache.lucene.store.Directory;
  24 import org.apache.lucene.search.DocIdSet;
  25 import org.apache.lucene.search.DocIdSetIterator;
  26 import org.apache.lucene.search.Filter;
  27 import org.apache.lucene.search.TermRangeFilter;
  28 import org.apache.lucene.util.FixedBitSet;
  29 import org.apache.lucene.util.IOUtils;
  30 import org.apache.lucene.util.Version;
  31
  32 /**
  33  * Split an index based on a {@link Filter}.
  34  */
  35
  36 public class PKIndexSplitter {
  37   private final Filter docsInFirstIndex;
  38   private final Directory input;
  39   private final Directory dir1;
  40   private final Directory dir2;
  41   private final IndexWriterConfig config1;
  42   private final IndexWriterConfig config2;
  43
  44   /**
  45    * Split an index based on a {@link Filter}. All documents that match the filter
  46    * are sent to dir1, remaining ones to dir2.
  47    * @deprecated use {@link #PKIndexSplitter(Version, Directory, Directory, Directory, Filter)} instead.
  48    *             This constructor will be removed in Lucene 4.0.
  49    */
  50   @Deprecated
  51   public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex) {
  52     this(Version.LUCENE_CURRENT, input, dir1, dir2, docsInFirstIndex);
  53   }
  54
  55   /**
  56    * Split an index based on a  given primary key term
  57    * and a 'middle' term.  If the middle term is present, it's
  58    * sent to dir2.
  59    * @deprecated use {@link #PKIndexSplitter(Version, Directory, Directory, Directory, Term)}
  60    *             instead. This constructor will be removed in Lucene 4.0.
  61    */
  62   @Deprecated
  63   public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Term midTerm) {
  64     this(input, dir1, dir2,
  65       new TermRangeFilter(midTerm.field(), null, midTerm.text(), true, false));
  66   }
  67
  68   /**
  69    * Split an index based on a {@link Filter}. All documents that match the filter
  70    * are sent to dir1, remaining ones to dir2.
  71    */
  72   public PKIndexSplitter(Version version, Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex) {
  73     this(input, dir1, dir2, docsInFirstIndex, newDefaultConfig(version), newDefaultConfig(version));
  74   }
  75
  76   private static IndexWriterConfig newDefaultConfig(Version version) {
  77     return  new IndexWriterConfig(version, null).setOpenMode(OpenMode.CREATE);
  78   }
  79
  80   public PKIndexSplitter(Directory input, Directory dir1,
  81       Directory dir2, Filter docsInFirstIndex, IndexWriterConfig config1, IndexWriterConfig config2) {
  82     this.input = input;
  83     this.dir1 = dir1;
  84     this.dir2 = dir2;
  85     this.docsInFirstIndex = docsInFirstIndex;
  86     this.config1 = config1;
  87     this.config2 = config2;
  88   }
  89
  90   /**
  91    * Split an index based on a  given primary key term
  92    * and a 'middle' term.  If the middle term is present, it's
  93    * sent to dir2.
  94    */
  95   public PKIndexSplitter(Version version, Directory input, Directory dir1, Directory dir2, Term midTerm) {
  96     this(version, input, dir1, dir2,
  97       new TermRangeFilter(midTerm.field(), null, midTerm.text(), true, false));
  98   }
  99
 100   public PKIndexSplitter(Directory input, Directory dir1,
 101       Directory dir2, Term midTerm, IndexWriterConfig config1, IndexWriterConfig config2) {
 102     this(input, dir1, dir2,
 103       new TermRangeFilter(midTerm.field(), null, midTerm.text(), true, false), config1, config2);
 104   }
 105
 106   public void split() throws IOException {
 107     boolean success = false;
 108     IndexReader reader = IndexReader.open(input);
 109     try {
 110       // pass an individual config in here since one config can not be reused!
 111       createIndex(config1, dir1, reader, docsInFirstIndex, false);
 112       createIndex(config2, dir2, reader, docsInFirstIndex, true);
 113       success = true;
 114     } finally {
 115       if (success) {
 116         IOUtils.close(reader);
 117       } else {
 118         IOUtils.closeWhileHandlingException(reader);
 119       }
 120     }
 121   }
 122
 123   private void createIndex(IndexWriterConfig config, Directory target, IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException {
 124     boolean success = false;
 125     IndexWriter w = new IndexWriter(target, config);
 126     try {
 127       w.addIndexes(new DocumentFilteredIndexReader(reader, preserveFilter, negateFilter));
 128       success = true;
 129     } finally {
 130       if (success) {
 131         IOUtils.close(w);
 132       } else {
 133         IOUtils.closeWhileHandlingException(w);
 134       }
 135     }
 136   }
 137
 138   public static class DocumentFilteredIndexReader extends FilterIndexReader {
 139     final FixedBitSet readerDels;
 140     final int numDocs;
 141
 142     public DocumentFilteredIndexReader(IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException {
 143       super(reader);
 144
 145       final FixedBitSet bits = new FixedBitSet(in.maxDoc());
 146       final DocIdSet docs = preserveFilter.getDocIdSet(in);
 147       if (docs != null) {
 148         final DocIdSetIterator it = docs.iterator();
 149         if (it != null) {
 150           bits.or(it);
 151         }
 152       }
 153       // this is somehow inverse, if we negate the filter, we delete all documents it matches!
 154       if (!negateFilter) {
 155         bits.flip(0, in.maxDoc());
 156       }
 157
 158       if (in.hasDeletions()) {
 159         for (int i = 0; i < in.maxDoc(); i++) {
 160           if (in.isDeleted(i)) {
 161             bits.set(i);
 162           }
 163         }
 164       }
 165
 166       this.readerDels = bits;
 167       this.numDocs = in.maxDoc() - bits.cardinality();
 168     }
 169
 170     @Override
 171     public int numDocs() {
 172       return numDocs;
 173     }
 174
 175     @Override
 176     public boolean hasDeletions() {
 177       return (in.maxDoc() != numDocs);
 178     }
 179
 180     @Override
 181     public boolean isDeleted(int n) {
 182       return readerDels.get(n);
 183     }
 184
 185     @Override
 186     public IndexReader[] getSequentialSubReaders() {
 187       return null;
 188     }
 189
 190     @Override
 191     public TermPositions termPositions() throws IOException {
 192       return new FilterTermPositions(in.termPositions()) {
 193
 194         @Override
 195         public boolean next() throws IOException {
 196           boolean res;
 197           while ((res = super.next())) {
 198             if (!readerDels.get(doc())) {
 199               break;
 200             }
 201           }
 202           return res;
 203         }
 204       };
 205     }
 206   }
 207 }