1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
22 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
23 import org.apache.lucene.store.Directory;
24 import org.apache.lucene.search.DocIdSet;
25 import org.apache.lucene.search.DocIdSetIterator;
26 import org.apache.lucene.search.Filter;
27 import org.apache.lucene.search.TermRangeFilter;
28 import org.apache.lucene.util.FixedBitSet;
29 import org.apache.lucene.util.IOUtils;
30 import org.apache.lucene.util.Version;
33 * Split an index based on a {@link Filter}.
36 public class PKIndexSplitter {
37 private final Filter docsInFirstIndex;
38 private final Directory input;
39 private final Directory dir1;
40 private final Directory dir2;
41 private final IndexWriterConfig config1;
42 private final IndexWriterConfig config2;
45 * Split an index based on a {@link Filter}. All documents that match the filter
46 * are sent to dir1, remaining ones to dir2.
47 * @deprecated use {@link #PKIndexSplitter(Version, Directory, Directory, Directory, Filter)} instead.
48 * This constructor will be removed in Lucene 4.0.
51 public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex) {
52 this(Version.LUCENE_CURRENT, input, dir1, dir2, docsInFirstIndex);
56 * Split an index based on a given primary key term
57 * and a 'middle' term. If the middle term is present, it's
59 * @deprecated use {@link #PKIndexSplitter(Version, Directory, Directory, Directory, Term)}
60 * instead. This constructor will be removed in Lucene 4.0.
63 public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Term midTerm) {
64 this(input, dir1, dir2,
65 new TermRangeFilter(midTerm.field(), null, midTerm.text(), true, false));
69 * Split an index based on a {@link Filter}. All documents that match the filter
70 * are sent to dir1, remaining ones to dir2.
72 public PKIndexSplitter(Version version, Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex) {
73 this(input, dir1, dir2, docsInFirstIndex, newDefaultConfig(version), newDefaultConfig(version));
76 private static IndexWriterConfig newDefaultConfig(Version version) {
77 return new IndexWriterConfig(version, null).setOpenMode(OpenMode.CREATE);
80 public PKIndexSplitter(Directory input, Directory dir1,
81 Directory dir2, Filter docsInFirstIndex, IndexWriterConfig config1, IndexWriterConfig config2) {
85 this.docsInFirstIndex = docsInFirstIndex;
86 this.config1 = config1;
87 this.config2 = config2;
91 * Split an index based on a given primary key term
92 * and a 'middle' term. If the middle term is present, it's
95 public PKIndexSplitter(Version version, Directory input, Directory dir1, Directory dir2, Term midTerm) {
96 this(version, input, dir1, dir2,
97 new TermRangeFilter(midTerm.field(), null, midTerm.text(), true, false));
100 public PKIndexSplitter(Directory input, Directory dir1,
101 Directory dir2, Term midTerm, IndexWriterConfig config1, IndexWriterConfig config2) {
102 this(input, dir1, dir2,
103 new TermRangeFilter(midTerm.field(), null, midTerm.text(), true, false), config1, config2);
106 public void split() throws IOException {
107 boolean success = false;
108 IndexReader reader = IndexReader.open(input);
110 // pass an individual config in here since one config can not be reused!
111 createIndex(config1, dir1, reader, docsInFirstIndex, false);
112 createIndex(config2, dir2, reader, docsInFirstIndex, true);
116 IOUtils.close(reader);
118 IOUtils.closeWhileHandlingException(reader);
123 private void createIndex(IndexWriterConfig config, Directory target, IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException {
124 boolean success = false;
125 IndexWriter w = new IndexWriter(target, config);
127 w.addIndexes(new DocumentFilteredIndexReader(reader, preserveFilter, negateFilter));
133 IOUtils.closeWhileHandlingException(w);
138 public static class DocumentFilteredIndexReader extends FilterIndexReader {
139 final FixedBitSet readerDels;
142 public DocumentFilteredIndexReader(IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException {
145 final FixedBitSet bits = new FixedBitSet(in.maxDoc());
146 final DocIdSet docs = preserveFilter.getDocIdSet(in);
148 final DocIdSetIterator it = docs.iterator();
153 // this is somehow inverse, if we negate the filter, we delete all documents it matches!
155 bits.flip(0, in.maxDoc());
158 if (in.hasDeletions()) {
159 for (int i = 0; i < in.maxDoc(); i++) {
160 if (in.isDeleted(i)) {
166 this.readerDels = bits;
167 this.numDocs = in.maxDoc() - bits.cardinality();
171 public int numDocs() {
176 public boolean hasDeletions() {
177 return (in.maxDoc() != numDocs);
181 public boolean isDeleted(int n) {
182 return readerDels.get(n);
186 public IndexReader[] getSequentialSubReaders() {
191 public TermPositions termPositions() throws IOException {
192 return new FilterTermPositions(in.termPositions()) {
195 public boolean next() throws IOException {
197 while ((res = super.next())) {
198 if (!readerDels.get(doc())) {