X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java?ds=sidebyside diff --git a/lucene-java-3.4.0/lucene/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java b/lucene-java-3.4.0/lucene/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java deleted file mode 100644 index 2da66f8..0000000 --- a/lucene-java-3.4.0/lucene/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java +++ /dev/null @@ -1,267 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; - -import org.apache.lucene.analysis.WhitespaceAnalyzer; -import org.apache.lucene.index.IndexWriter; // javadoc -import org.apache.lucene.index.IndexWriterConfig.OpenMode; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FSDirectory; -import org.apache.lucene.util.FixedBitSet; -import org.apache.lucene.util.Version; - -/** - * This tool splits input index into multiple equal parts. The method employed - * here uses {@link IndexWriter#addIndexes(IndexReader[])} where the input data - * comes from the input index with artificially applied deletes to the document - * id-s that fall outside the selected partition. - *
Note 1: Deletes are only applied to a buffered list of deleted docs and - * don't affect the source index - this tool works also with read-only indexes. - *
Note 2: the disadvantage of this tool is that source index needs to be - * read as many times as there are parts to be created, hence the name of this - * tool. - * - *
NOTE: this tool is unaware of documents added
- * atomically via {@link IndexWriter#addDocuments} or {@link
- * IndexWriter#updateDocuments}, which means it can easily
- * break up such document groups.
- */
-public class MultiPassIndexSplitter {
- /**
- * Split source index into multiple parts.
- * @param input source index, can be read-only, can have deletions, can have
- * multiple segments (or multiple readers).
- * @param outputs list of directories where the output parts will be stored.
- * @param seq if true, then the source index will be split into equal
- * increasing ranges of document id-s. If false, source document id-s will be
- * assigned in a deterministic round-robin fashion to one of the output splits.
- * @throws IOException
- * @deprecated use {@link #split(Version, IndexReader, Directory[], boolean)} instead.
- * This method will be removed in Lucene 4.0.
- */
- @Deprecated
- public void split(IndexReader input, Directory[] outputs, boolean seq) throws IOException {
- split(Version.LUCENE_CURRENT, input, outputs, seq);
- }
-
- /**
- * Split source index into multiple parts.
- * @param input source index, can be read-only, can have deletions, can have
- * multiple segments (or multiple readers).
- * @param outputs list of directories where the output parts will be stored.
- * @param seq if true, then the source index will be split into equal
- * increasing ranges of document id-s. If false, source document id-s will be
- * assigned in a deterministic round-robin fashion to one of the output splits.
- * @throws IOException
- */
- public void split(Version version, IndexReader input, Directory[] outputs, boolean seq) throws IOException {
- if (outputs == null || outputs.length < 2) {
- throw new IOException("Invalid number of outputs.");
- }
- if (input == null || input.numDocs() < 2) {
- throw new IOException("Not enough documents for splitting");
- }
- int numParts = outputs.length;
- // wrap a potentially read-only input
- // this way we don't have to preserve original deletions because neither
- // deleteDocument(int) or undeleteAll() is applied to the wrapped input index.
- input = new FakeDeleteIndexReader(input);
- int maxDoc = input.maxDoc();
- int partLen = maxDoc / numParts;
- for (int i = 0; i < numParts; i++) {
- input.undeleteAll();
- if (seq) { // sequential range
- int lo = partLen * i;
- int hi = lo + partLen;
- // below range
- for (int j = 0; j < lo; j++) {
- input.deleteDocument(j);
- }
- // above range - last part collects all id-s that remained due to
- // integer rounding errors
- if (i < numParts - 1) {
- for (int j = hi; j < maxDoc; j++) {
- input.deleteDocument(j);
- }
- }
- } else {
- // round-robin
- for (int j = 0; j < maxDoc; j++) {
- if ((j + numParts - i) % numParts != 0) {
- input.deleteDocument(j);
- }
- }
- }
- IndexWriter w = new IndexWriter(outputs[i], new IndexWriterConfig(
- version,
- new WhitespaceAnalyzer(Version.LUCENE_CURRENT))
- .setOpenMode(OpenMode.CREATE));
- System.err.println("Writing part " + (i + 1) + " ...");
- w.addIndexes(new IndexReader[]{input});
- w.close();
- }
- System.err.println("Done.");
- }
-
- @SuppressWarnings("deprecation")
- public static void main(String[] args) throws Exception {
- if (args.length < 5) {
- System.err.println("Usage: MultiPassIndexSplitter -out