+++ /dev/null
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.search.DocIdSet;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.Filter;
-import org.apache.lucene.search.TermRangeFilter;
-import org.apache.lucene.util.FixedBitSet;
-import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.Version;
-
-/**
- * Split an index based on a {@link Filter}.
- */
-
-public class PKIndexSplitter {
- private final Filter docsInFirstIndex;
- private final Directory input;
- private final Directory dir1;
- private final Directory dir2;
- private final IndexWriterConfig config1;
- private final IndexWriterConfig config2;
-
- /**
- * Split an index based on a {@link Filter}. All documents that match the filter
- * are sent to dir1, remaining ones to dir2.
- * @deprecated use {@link #PKIndexSplitter(Version, Directory, Directory, Directory, Filter)} instead.
- * This constructor will be removed in Lucene 4.0.
- */
- @Deprecated
- public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex) {
- this(Version.LUCENE_CURRENT, input, dir1, dir2, docsInFirstIndex);
- }
-
- /**
- * Split an index based on a given primary key term
- * and a 'middle' term. If the middle term is present, it's
- * sent to dir2.
- * @deprecated use {@link #PKIndexSplitter(Version, Directory, Directory, Directory, Term)}
- * instead. This constructor will be removed in Lucene 4.0.
- */
- @Deprecated
- public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Term midTerm) {
- this(input, dir1, dir2,
- new TermRangeFilter(midTerm.field(), null, midTerm.text(), true, false));
- }
-
- /**
- * Split an index based on a {@link Filter}. All documents that match the filter
- * are sent to dir1, remaining ones to dir2.
- */
- public PKIndexSplitter(Version version, Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex) {
- this(input, dir1, dir2, docsInFirstIndex, newDefaultConfig(version), newDefaultConfig(version));
- }
-
- private static IndexWriterConfig newDefaultConfig(Version version) {
- return new IndexWriterConfig(version, null).setOpenMode(OpenMode.CREATE);
- }
-
- public PKIndexSplitter(Directory input, Directory dir1,
- Directory dir2, Filter docsInFirstIndex, IndexWriterConfig config1, IndexWriterConfig config2) {
- this.input = input;
- this.dir1 = dir1;
- this.dir2 = dir2;
- this.docsInFirstIndex = docsInFirstIndex;
- this.config1 = config1;
- this.config2 = config2;
- }
-
- /**
- * Split an index based on a given primary key term
- * and a 'middle' term. If the middle term is present, it's
- * sent to dir2.
- */
- public PKIndexSplitter(Version version, Directory input, Directory dir1, Directory dir2, Term midTerm) {
- this(version, input, dir1, dir2,
- new TermRangeFilter(midTerm.field(), null, midTerm.text(), true, false));
- }
-
- public PKIndexSplitter(Directory input, Directory dir1,
- Directory dir2, Term midTerm, IndexWriterConfig config1, IndexWriterConfig config2) {
- this(input, dir1, dir2,
- new TermRangeFilter(midTerm.field(), null, midTerm.text(), true, false), config1, config2);
- }
-
- public void split() throws IOException {
- boolean success = false;
- IndexReader reader = IndexReader.open(input);
- try {
- // pass an individual config in here since one config can not be reused!
- createIndex(config1, dir1, reader, docsInFirstIndex, false);
- createIndex(config2, dir2, reader, docsInFirstIndex, true);
- success = true;
- } finally {
- if (success) {
- IOUtils.close(reader);
- } else {
- IOUtils.closeWhileHandlingException(reader);
- }
- }
- }
-
- private void createIndex(IndexWriterConfig config, Directory target, IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException {
- boolean success = false;
- IndexWriter w = new IndexWriter(target, config);
- try {
- w.addIndexes(new DocumentFilteredIndexReader(reader, preserveFilter, negateFilter));
- success = true;
- } finally {
- if (success) {
- IOUtils.close(w);
- } else {
- IOUtils.closeWhileHandlingException(w);
- }
- }
- }
-
- public static class DocumentFilteredIndexReader extends FilterIndexReader {
- final FixedBitSet readerDels;
- final int numDocs;
-
- public DocumentFilteredIndexReader(IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException {
- super(reader);
-
- final FixedBitSet bits = new FixedBitSet(in.maxDoc());
- final DocIdSet docs = preserveFilter.getDocIdSet(in);
- if (docs != null) {
- final DocIdSetIterator it = docs.iterator();
- if (it != null) {
- bits.or(it);
- }
- }
- // this is somehow inverse, if we negate the filter, we delete all documents it matches!
- if (!negateFilter) {
- bits.flip(0, in.maxDoc());
- }
-
- if (in.hasDeletions()) {
- for (int i = 0; i < in.maxDoc(); i++) {
- if (in.isDeleted(i)) {
- bits.set(i);
- }
- }
- }
-
- this.readerDels = bits;
- this.numDocs = in.maxDoc() - bits.cardinality();
- }
-
- @Override
- public int numDocs() {
- return numDocs;
- }
-
- @Override
- public boolean hasDeletions() {
- return (in.maxDoc() != numDocs);
- }
-
- @Override
- public boolean isDeleted(int n) {
- return readerDels.get(n);
- }
-
- @Override
- public IndexReader[] getSequentialSubReaders() {
- return null;
- }
-
- @Override
- public TermPositions termPositions() throws IOException {
- return new FilterTermPositions(in.termPositions()) {
-
- @Override
- public boolean next() throws IOException {
- boolean res;
- while ((res = super.next())) {
- if (!readerDels.get(doc())) {
- break;
- }
- }
- return res;
- }
- };
- }
- }
-}