1 package org.apache.lucene.benchmark.byTask.feeds;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.BufferedWriter;
22 import java.io.FileOutputStream;
23 import java.io.IOException;
24 import java.io.OutputStream;
25 import java.io.OutputStreamWriter;
26 import java.util.Properties;
28 import org.apache.commons.compress.compressors.CompressorStreamFactory;
29 import org.apache.lucene.analysis.WhitespaceAnalyzer;
30 import org.apache.lucene.benchmark.BenchmarkTestCase;
31 import org.apache.lucene.benchmark.byTask.PerfRunData;
32 import org.apache.lucene.benchmark.byTask.feeds.LineDocSource.HeaderLineParser;
33 import org.apache.lucene.benchmark.byTask.feeds.LineDocSource.LineParser;
34 import org.apache.lucene.benchmark.byTask.tasks.AddDocTask;
35 import org.apache.lucene.benchmark.byTask.tasks.CloseIndexTask;
36 import org.apache.lucene.benchmark.byTask.tasks.CreateIndexTask;
37 import org.apache.lucene.benchmark.byTask.tasks.TaskSequence;
38 import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask;
39 import org.apache.lucene.benchmark.byTask.utils.Config;
40 import org.apache.lucene.index.IndexReader;
41 import org.apache.lucene.index.Term;
42 import org.apache.lucene.search.IndexSearcher;
43 import org.apache.lucene.search.TermQuery;
44 import org.apache.lucene.search.TopDocs;
45 import org.apache.lucene.util.IOUtils;
47 /** Tests the functionality of {@link LineDocSource}. */
48 public class LineDocSourceTest extends BenchmarkTestCase {
50 private static final CompressorStreamFactory csFactory = new CompressorStreamFactory();
52 private void createBZ2LineFile(File file, boolean addHeader) throws Exception {
53 OutputStream out = new FileOutputStream(file);
54 out = csFactory.createCompressorOutputStream("bzip2", out);
55 BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "utf-8"));
56 writeDocsToFile(writer, addHeader, null);
60 private void writeDocsToFile(BufferedWriter writer, boolean addHeader, Properties otherFields) throws IOException {
62 writer.write(WriteLineDocTask.FIELDS_HEADER_INDICATOR);
63 writer.write(WriteLineDocTask.SEP);
64 writer.write(DocMaker.TITLE_FIELD);
65 writer.write(WriteLineDocTask.SEP);
66 writer.write(DocMaker.DATE_FIELD);
67 writer.write(WriteLineDocTask.SEP);
68 writer.write(DocMaker.BODY_FIELD);
69 if (otherFields!=null) {
70 // additional field names in the header
71 for (Object fn : otherFields.keySet()) {
72 writer.write(WriteLineDocTask.SEP);
73 writer.write(fn.toString());
78 StringBuilder doc = new StringBuilder();
79 doc.append("title").append(WriteLineDocTask.SEP).append("date").append(WriteLineDocTask.SEP).append(DocMaker.BODY_FIELD);
80 if (otherFields!=null) {
81 // additional field values in the doc line
82 for (Object fv : otherFields.values()) {
83 doc.append(WriteLineDocTask.SEP).append(fv.toString());
86 writer.write(doc.toString());
90 private void createRegularLineFile(File file, boolean addHeader) throws Exception {
91 OutputStream out = new FileOutputStream(file);
92 BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "utf-8"));
93 writeDocsToFile(writer, addHeader, null);
97 private void createRegularLineFileWithMoreFields(File file, String...extraFields) throws Exception {
98 OutputStream out = new FileOutputStream(file);
99 BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "utf-8"));
100 Properties p = new Properties();
101 for (String f : extraFields) {
104 writeDocsToFile(writer, true, p);
108 private void doIndexAndSearchTest(File file, Class<? extends LineParser> lineParserClass, String storedField) throws Exception {
109 doIndexAndSearchTestWithRepeats(file, lineParserClass, 1, storedField); // no extra repetitions
110 doIndexAndSearchTestWithRepeats(file, lineParserClass, 2, storedField); // 1 extra repetition
111 doIndexAndSearchTestWithRepeats(file, lineParserClass, 4, storedField); // 3 extra repetitions
114 private void doIndexAndSearchTestWithRepeats(File file,
115 Class<? extends LineParser> lineParserClass, int numAdds,
116 String storedField) throws Exception {
118 IndexReader reader = null;
119 IndexSearcher searcher = null;
120 PerfRunData runData = null;
122 Properties props = new Properties();
124 // LineDocSource specific settings.
125 props.setProperty("docs.file", file.getAbsolutePath());
126 if (lineParserClass != null) {
127 props.setProperty("line.parser", lineParserClass.getName());
130 // Indexing configuration.
131 props.setProperty("analyzer", WhitespaceAnalyzer.class.getName());
132 props.setProperty("content.source", LineDocSource.class.getName());
133 props.setProperty("directory", "RAMDirectory");
134 props.setProperty("doc.stored", "true");
135 props.setProperty("doc.index.props", "true");
137 // Create PerfRunData
138 Config config = new Config(props);
139 runData = new PerfRunData(config);
141 TaskSequence tasks = new TaskSequence(runData, "testBzip2", null, false);
142 tasks.addTask(new CreateIndexTask(runData));
143 for (int i=0; i<numAdds; i++) {
144 tasks.addTask(new AddDocTask(runData));
146 tasks.addTask(new CloseIndexTask(runData));
153 reader = IndexReader.open(runData.getDirectory(), true);
154 searcher = new IndexSearcher(reader);
155 TopDocs td = searcher.search(new TermQuery(new Term("body", "body")), 10);
156 assertEquals(numAdds, td.totalHits);
157 assertNotNull(td.scoreDocs[0]);
159 if (storedField==null) {
160 storedField = DocMaker.BODY_FIELD; // added to all docs and satisfies field-name == value
162 assertEquals("Wrong field value", storedField, searcher.doc(0).get(storedField));
164 IOUtils.close(searcher, reader, runData);
168 /* Tests LineDocSource with a bzip2 input stream. */
169 public void testBZip2() throws Exception {
170 File file = new File(getWorkDir(), "one-line.bz2");
171 createBZ2LineFile(file,true);
172 doIndexAndSearchTest(file, null, null);
175 public void testBZip2NoHeaderLine() throws Exception {
176 File file = new File(getWorkDir(), "one-line.bz2");
177 createBZ2LineFile(file,false);
178 doIndexAndSearchTest(file, null, null);
181 public void testRegularFile() throws Exception {
182 File file = new File(getWorkDir(), "one-line");
183 createRegularLineFile(file,true);
184 doIndexAndSearchTest(file, null, null);
187 public void testRegularFileSpecialHeader() throws Exception {
188 File file = new File(getWorkDir(), "one-line");
189 createRegularLineFile(file,true);
190 doIndexAndSearchTest(file, HeaderLineParser.class, null);
193 public void testRegularFileNoHeaderLine() throws Exception {
194 File file = new File(getWorkDir(), "one-line");
195 createRegularLineFile(file,false);
196 doIndexAndSearchTest(file, null, null);
199 public void testInvalidFormat() throws Exception {
200 String[] testCases = new String[] {
202 "title", // just title
203 "title" + WriteLineDocTask.SEP, // title + SEP
204 "title" + WriteLineDocTask.SEP + "body", // title + SEP + body
205 // note that title + SEP + body + SEP is a valid line, which results in an
209 for (int i = 0; i < testCases.length; i++) {
210 File file = new File(getWorkDir(), "one-line");
211 BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "utf-8"));
212 writer.write(testCases[i]);
216 doIndexAndSearchTest(file, null, null);
217 fail("Some exception should have been thrown for: [" + testCases[i] + "]");
218 } catch (Exception e) {
224 /** Doc Name is not part of the default header */
225 public void testWithDocsName() throws Exception {
226 File file = new File(getWorkDir(), "one-line");
227 createRegularLineFileWithMoreFields(file, DocMaker.NAME_FIELD);
228 doIndexAndSearchTest(file, null, DocMaker.NAME_FIELD);
231 /** Use fields names that are not defined in Docmaker and so will go to Properties */
232 public void testWithProperties() throws Exception {
233 File file = new File(getWorkDir(), "one-line");
234 String specialField = "mySpecialField";
235 createRegularLineFileWithMoreFields(file, specialField);
236 doIndexAndSearchTest(file, null, specialField);