pylucene 3.5.0-3

[pylucene.git] / lucene-java-3.5.0 / lucene / contrib / misc / src / java / org / apache / lucene / index / PKIndexSplitter.java
diff --git a/lucene-java-3.5.0/lucene/contrib/misc/src/java/org/apache/lucene/index/PKIndexSplitter.java b/lucene-java-3.5.0/lucene/contrib/misc/src/java/org/apache/lucene/index/PKIndexSplitter.java

new file mode 100644 (file)

index 0000000..d517593
--- /dev/null
+++ b/lucene-java-3.5.0/lucene/contrib/misc/src/java/org/apache/lucene/index/PKIndexSplitter.java
@@ -0,0 +1,207 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.TermRangeFilter;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
+
+/**
+ * Split an index based on a {@link Filter}.
+ */
+
+public class PKIndexSplitter {
+  private final Filter docsInFirstIndex;
+  private final Directory input;
+  private final Directory dir1;
+  private final Directory dir2;
+  private final IndexWriterConfig config1;
+  private final IndexWriterConfig config2;
+  
+  /**
+   * Split an index based on a {@link Filter}. All documents that match the filter
+   * are sent to dir1, remaining ones to dir2.
+   * @deprecated use {@link #PKIndexSplitter(Version, Directory, Directory, Directory, Filter)} instead.
+   *             This constructor will be removed in Lucene 4.0. 
+   */
+  @Deprecated
+  public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex) {
+    this(Version.LUCENE_CURRENT, input, dir1, dir2, docsInFirstIndex);
+  }
+  
+  /**
+   * Split an index based on a  given primary key term 
+   * and a 'middle' term.  If the middle term is present, it's
+   * sent to dir2.
+   * @deprecated use {@link #PKIndexSplitter(Version, Directory, Directory, Directory, Term)}
+   *             instead. This constructor will be removed in Lucene 4.0.
+   */
+  @Deprecated
+  public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Term midTerm) {
+    this(input, dir1, dir2,
+      new TermRangeFilter(midTerm.field(), null, midTerm.text(), true, false));
+  }
+  
+  /**
+   * Split an index based on a {@link Filter}. All documents that match the filter
+   * are sent to dir1, remaining ones to dir2.
+   */
+  public PKIndexSplitter(Version version, Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex) {
+    this(input, dir1, dir2, docsInFirstIndex, newDefaultConfig(version), newDefaultConfig(version));
+  }
+  
+  private static IndexWriterConfig newDefaultConfig(Version version) {
+    return  new IndexWriterConfig(version, null).setOpenMode(OpenMode.CREATE);
+  }
+  
+  public PKIndexSplitter(Directory input, Directory dir1, 
+      Directory dir2, Filter docsInFirstIndex, IndexWriterConfig config1, IndexWriterConfig config2) {
+    this.input = input;
+    this.dir1 = dir1;
+    this.dir2 = dir2;
+    this.docsInFirstIndex = docsInFirstIndex;
+    this.config1 = config1;
+    this.config2 = config2;
+  }
+  
+  /**
+   * Split an index based on a  given primary key term 
+   * and a 'middle' term.  If the middle term is present, it's
+   * sent to dir2.
+   */
+  public PKIndexSplitter(Version version, Directory input, Directory dir1, Directory dir2, Term midTerm) {
+    this(version, input, dir1, dir2,
+      new TermRangeFilter(midTerm.field(), null, midTerm.text(), true, false));
+  }
+  
+  public PKIndexSplitter(Directory input, Directory dir1, 
+      Directory dir2, Term midTerm, IndexWriterConfig config1, IndexWriterConfig config2) {
+    this(input, dir1, dir2,
+      new TermRangeFilter(midTerm.field(), null, midTerm.text(), true, false), config1, config2);
+  }
+  
+  public void split() throws IOException {
+    boolean success = false;
+    IndexReader reader = IndexReader.open(input);
+    try {
+      // pass an individual config in here since one config can not be reused!
+      createIndex(config1, dir1, reader, docsInFirstIndex, false);
+      createIndex(config2, dir2, reader, docsInFirstIndex, true);
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(reader);
+      } else {
+        IOUtils.closeWhileHandlingException(reader);
+      }
+    }
+  }
+  
+  private void createIndex(IndexWriterConfig config, Directory target, IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException {
+    boolean success = false;
+    IndexWriter w = new IndexWriter(target, config);
+    try {
+      w.addIndexes(new DocumentFilteredIndexReader(reader, preserveFilter, negateFilter));
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(w);
+      } else {
+        IOUtils.closeWhileHandlingException(w);
+      }
+    }
+  }
+    
+  public static class DocumentFilteredIndexReader extends FilterIndexReader {
+    final FixedBitSet readerDels;
+    final int numDocs;
+    
+    public DocumentFilteredIndexReader(IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException {
+      super(reader);
+      
+      final FixedBitSet bits = new FixedBitSet(in.maxDoc());
+      final DocIdSet docs = preserveFilter.getDocIdSet(in);
+      if (docs != null) {
+        final DocIdSetIterator it = docs.iterator();
+        if (it != null) {
+          bits.or(it);
+        }
+      }
+      // this is somehow inverse, if we negate the filter, we delete all documents it matches!
+      if (!negateFilter) {
+        bits.flip(0, in.maxDoc());
+      }
+
+      if (in.hasDeletions()) {
+        for (int i = 0; i < in.maxDoc(); i++) {
+          if (in.isDeleted(i)) {
+            bits.set(i);
+          }
+        }
+      }
+      
+      this.readerDels = bits;
+      this.numDocs = in.maxDoc() - bits.cardinality();
+    }
+    
+    @Override
+    public int numDocs() {
+      return numDocs;
+    }
+    
+    @Override
+    public boolean hasDeletions() {
+      return (in.maxDoc() != numDocs);
+    }
+
+    @Override
+    public boolean isDeleted(int n) {
+      return readerDels.get(n);
+    }
+
+    @Override
+    public IndexReader[] getSequentialSubReaders() {
+      return null;
+    }
+
+    @Override
+    public TermPositions termPositions() throws IOException {
+      return new FilterTermPositions(in.termPositions()) {
+
+        @Override
+        public boolean next() throws IOException {
+          boolean res;
+          while ((res = super.next())) {
+            if (!readerDels.get(doc())) {
+              break;
+            }
+          }
+          return res;
+        }        
+      };
+    }
+  }
+}