X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java
diff --git a/lucene-java-3.4.0/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java b/lucene-java-3.4.0/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java
deleted file mode 100644
index eff7758..0000000
--- a/lucene-java-3.4.0/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java
+++ /dev/null
@@ -1,210 +0,0 @@
-package org.apache.lucene.benchmark.byTask.tasks;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.OutputStream;
-import java.io.OutputStreamWriter;
-import java.io.PrintWriter;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.apache.lucene.benchmark.byTask.PerfRunData;
-import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
-import org.apache.lucene.benchmark.byTask.utils.Config;
-import org.apache.lucene.benchmark.byTask.utils.StreamUtils;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-
-/**
- * A task which writes documents, one line per document. Each line is in the
- * following format: title <TAB> date <TAB> body. The output of this
- * task can be consumed by
- * {@link org.apache.lucene.benchmark.byTask.feeds.LineDocSource} and is intended
- * to save the IO overhead of opening a file per document to be indexed.
- *
- * The format of the output is set according to the output file extension.
- * Compression is recommended when the output file is expected to be large.
- * See info on file extensions in
- * {@link org.apache.lucene.benchmark.byTask.utils.StreamUtils.Type}
- *
- * Supports the following parameters:
- *
- * - line.file.out - the name of the file to write the output to. That
- * parameter is mandatory. NOTE: the file is re-created.
- *
- line.fields - which fields should be written in each line.
- * (optional, default: {@link #DEFAULT_FIELDS}).
- *
- sufficient.fields - list of field names, separated by comma, which,
- * if all of them are missing, the document will be skipped. For example, to require
- * that at least one of f1,f2 is not empty, specify: "f1,f2" in this field. To specify
- * that no field is required, i.e. that even empty docs should be emitted, specify ",".
- * (optional, default: {@link #DEFAULT_SUFFICIENT_FIELDS}).
- *
- * NOTE: this class is not thread-safe and if used by multiple threads the
- * output is unspecified (as all will write to the same output file in a
- * non-synchronized way).
- */
-public class WriteLineDocTask extends PerfTask {
-
- public static final String FIELDS_HEADER_INDICATOR = "FIELDS_HEADER_INDICATOR###";
-
- public final static char SEP = '\t';
-
- /**
- * Fields to be written by default
- */
- public static final String[] DEFAULT_FIELDS = new String[] {
- DocMaker.TITLE_FIELD,
- DocMaker.DATE_FIELD,
- DocMaker.BODY_FIELD,
- };
-
- /**
- * Default fields which at least one of them is required to not skip the doc.
- */
- public static final String DEFAULT_SUFFICIENT_FIELDS = DocMaker.TITLE_FIELD +',' + DocMaker.BODY_FIELD;
-
- private int docSize = 0;
- private PrintWriter lineFileOut = null;
- private DocMaker docMaker;
- private ThreadLocal threadBuffer = new ThreadLocal();
- private ThreadLocal threadNormalizer = new ThreadLocal();
- private final String[] fieldsToWrite;;
- private final boolean[] sufficientFields;
- private final boolean checkSufficientFields;
-
- public WriteLineDocTask(PerfRunData runData) throws Exception {
- super(runData);
- Config config = runData.getConfig();
- String fname = config.get("line.file.out", null);
- if (fname == null) {
- throw new IllegalArgumentException("line.file.out must be set");
- }
- OutputStream out = StreamUtils.outputStream(new File(fname));
- lineFileOut = new PrintWriter(new BufferedWriter(new OutputStreamWriter(out, "UTF-8"), StreamUtils.BUFFER_SIZE));
- docMaker = runData.getDocMaker();
-
- // init fields
- String f2r = config.get("line.fields",null);
- if (f2r == null) {
- fieldsToWrite = DEFAULT_FIELDS;
- } else {
- if (f2r.indexOf(SEP)>=0) {
- throw new IllegalArgumentException("line.fields "+f2r+" should not contain the separator char: "+SEP);
- }
- fieldsToWrite = f2r.split(",");
- }
-
- // init sufficient fields
- sufficientFields = new boolean[fieldsToWrite.length];
- String suff = config.get("sufficient.fields",DEFAULT_SUFFICIENT_FIELDS);
- if (",".equals(suff)) {
- checkSufficientFields = false;
- } else {
- checkSufficientFields = true;
- HashSet sf = new HashSet(Arrays.asList(suff.split(",")));
- for (int i=0; i 0 ? docMaker.makeDocument(docSize) : docMaker.makeDocument();
-
- Matcher matcher = threadNormalizer.get();
- if (matcher == null) {
- matcher = Pattern.compile("[\t\r\n]+").matcher("");
- threadNormalizer.set(matcher);
- }
-
- StringBuilder sb = threadBuffer.get();
- if (sb == null) {
- sb = new StringBuilder();
- threadBuffer.set(sb);
- }
- sb.setLength(0);
-
- boolean sufficient = !checkSufficientFields;
- for (int i=0; i0 && sufficientFields[i];
- }
- if (sufficient) {
- sb.setLength(sb.length()-1); // remove redundant last separator
- // lineFileOut is a PrintWriter, which synchronizes internally in println.
- lineFileOut.println(sb.toString());
- }
-
- return 1;
- }
-
- @Override
- public void close() throws Exception {
- lineFileOut.close();
- super.close();
- }
-
- /**
- * Set the params (docSize only)
- * @param params docSize, or 0 for no limit.
- */
- @Override
- public void setParams(String params) {
- super.setParams(params);
- docSize = (int) Float.parseFloat(params);
- }
-
- @Override
- public boolean supportsParams() {
- return true;
- }
-
-}