X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java diff --git a/lucene-java-3.5.0/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java b/lucene-java-3.5.0/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java new file mode 100644 index 0000000..210e43b --- /dev/null +++ b/lucene-java-3.5.0/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java @@ -0,0 +1,239 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.util.Properties; + +import org.apache.commons.compress.compressors.CompressorStreamFactory; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.benchmark.BenchmarkTestCase; +import org.apache.lucene.benchmark.byTask.PerfRunData; +import org.apache.lucene.benchmark.byTask.feeds.LineDocSource.HeaderLineParser; +import org.apache.lucene.benchmark.byTask.feeds.LineDocSource.LineParser; +import org.apache.lucene.benchmark.byTask.tasks.AddDocTask; +import org.apache.lucene.benchmark.byTask.tasks.CloseIndexTask; +import org.apache.lucene.benchmark.byTask.tasks.CreateIndexTask; +import org.apache.lucene.benchmark.byTask.tasks.TaskSequence; +import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask; +import org.apache.lucene.benchmark.byTask.utils.Config; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.util.IOUtils; + +/** Tests the functionality of {@link LineDocSource}. */ +public class LineDocSourceTest extends BenchmarkTestCase { + + private static final CompressorStreamFactory csFactory = new CompressorStreamFactory(); + + private void createBZ2LineFile(File file, boolean addHeader) throws Exception { + OutputStream out = new FileOutputStream(file); + out = csFactory.createCompressorOutputStream("bzip2", out); + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "utf-8")); + writeDocsToFile(writer, addHeader, null); + writer.close(); + } + + private void writeDocsToFile(BufferedWriter writer, boolean addHeader, Properties otherFields) throws IOException { + if (addHeader) { + writer.write(WriteLineDocTask.FIELDS_HEADER_INDICATOR); + writer.write(WriteLineDocTask.SEP); + writer.write(DocMaker.TITLE_FIELD); + writer.write(WriteLineDocTask.SEP); + writer.write(DocMaker.DATE_FIELD); + writer.write(WriteLineDocTask.SEP); + writer.write(DocMaker.BODY_FIELD); + if (otherFields!=null) { + // additional field names in the header + for (Object fn : otherFields.keySet()) { + writer.write(WriteLineDocTask.SEP); + writer.write(fn.toString()); + } + } + writer.newLine(); + } + StringBuilder doc = new StringBuilder(); + doc.append("title").append(WriteLineDocTask.SEP).append("date").append(WriteLineDocTask.SEP).append(DocMaker.BODY_FIELD); + if (otherFields!=null) { + // additional field values in the doc line + for (Object fv : otherFields.values()) { + doc.append(WriteLineDocTask.SEP).append(fv.toString()); + } + } + writer.write(doc.toString()); + writer.newLine(); + } + + private void createRegularLineFile(File file, boolean addHeader) throws Exception { + OutputStream out = new FileOutputStream(file); + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "utf-8")); + writeDocsToFile(writer, addHeader, null); + writer.close(); + } + + private void createRegularLineFileWithMoreFields(File file, String...extraFields) throws Exception { + OutputStream out = new FileOutputStream(file); + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "utf-8")); + Properties p = new Properties(); + for (String f : extraFields) { + p.setProperty(f, f); + } + writeDocsToFile(writer, true, p); + writer.close(); + } + + private void doIndexAndSearchTest(File file, Class lineParserClass, String storedField) throws Exception { + doIndexAndSearchTestWithRepeats(file, lineParserClass, 1, storedField); // no extra repetitions + doIndexAndSearchTestWithRepeats(file, lineParserClass, 2, storedField); // 1 extra repetition + doIndexAndSearchTestWithRepeats(file, lineParserClass, 4, storedField); // 3 extra repetitions + } + + private void doIndexAndSearchTestWithRepeats(File file, + Class lineParserClass, int numAdds, + String storedField) throws Exception { + + IndexReader reader = null; + IndexSearcher searcher = null; + PerfRunData runData = null; + try { + Properties props = new Properties(); + + // LineDocSource specific settings. + props.setProperty("docs.file", file.getAbsolutePath()); + if (lineParserClass != null) { + props.setProperty("line.parser", lineParserClass.getName()); + } + + // Indexing configuration. + props.setProperty("analyzer", WhitespaceAnalyzer.class.getName()); + props.setProperty("content.source", LineDocSource.class.getName()); + props.setProperty("directory", "RAMDirectory"); + props.setProperty("doc.stored", "true"); + props.setProperty("doc.index.props", "true"); + + // Create PerfRunData + Config config = new Config(props); + runData = new PerfRunData(config); + + TaskSequence tasks = new TaskSequence(runData, "testBzip2", null, false); + tasks.addTask(new CreateIndexTask(runData)); + for (int i=0; i