1 package org.apache.lucene.benchmark.byTask.tasks;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.BufferedReader;
22 import java.io.FileInputStream;
23 import java.io.InputStream;
24 import java.io.InputStreamReader;
25 import java.util.HashSet;
26 import java.util.Properties;
29 import org.apache.commons.compress.compressors.CompressorStreamFactory;
30 import org.apache.lucene.benchmark.BenchmarkTestCase;
31 import org.apache.lucene.benchmark.byTask.PerfRunData;
32 import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
33 import org.apache.lucene.benchmark.byTask.utils.Config;
34 import org.apache.lucene.benchmark.byTask.utils.StreamUtils.Type;
35 import org.apache.lucene.document.Document;
36 import org.apache.lucene.document.Field;
37 import org.apache.lucene.document.Field.Index;
38 import org.apache.lucene.document.Field.Store;
40 /** Tests the functionality of {@link WriteLineDocTask}. */
41 public class WriteLineDocTaskTest extends BenchmarkTestCase {
43 // class has to be public so that Class.forName.newInstance() will work
44 public static final class WriteLineDocMaker extends DocMaker {
47 public Document makeDocument() throws Exception {
48 Document doc = new Document();
49 doc.add(new Field(BODY_FIELD, "body", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
50 doc.add(new Field(TITLE_FIELD, "title", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
51 doc.add(new Field(DATE_FIELD, "date", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
57 // class has to be public so that Class.forName.newInstance() will work
58 public static final class NewLinesDocMaker extends DocMaker {
61 public Document makeDocument() throws Exception {
62 Document doc = new Document();
63 doc.add(new Field(BODY_FIELD, "body\r\ntext\ttwo", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
64 doc.add(new Field(TITLE_FIELD, "title\r\ntext", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
65 doc.add(new Field(DATE_FIELD, "date\r\ntext", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
71 // class has to be public so that Class.forName.newInstance() will work
72 public static final class NoBodyDocMaker extends DocMaker {
74 public Document makeDocument() throws Exception {
75 Document doc = new Document();
76 doc.add(new Field(TITLE_FIELD, "title", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
77 doc.add(new Field(DATE_FIELD, "date", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
82 // class has to be public so that Class.forName.newInstance() will work
83 public static final class NoTitleDocMaker extends DocMaker {
85 public Document makeDocument() throws Exception {
86 Document doc = new Document();
87 doc.add(new Field(BODY_FIELD, "body", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
88 doc.add(new Field(DATE_FIELD, "date", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
93 // class has to be public so that Class.forName.newInstance() will work
94 public static final class JustDateDocMaker extends DocMaker {
96 public Document makeDocument() throws Exception {
97 Document doc = new Document();
98 doc.add(new Field(DATE_FIELD, "date", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
103 // class has to be public so that Class.forName.newInstance() will work
104 // same as JustDate just that this one is treated as legal
105 public static final class LegalJustDateDocMaker extends DocMaker {
107 public Document makeDocument() throws Exception {
108 Document doc = new Document();
109 doc.add(new Field(DATE_FIELD, "date", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
114 // class has to be public so that Class.forName.newInstance() will work
115 public static final class EmptyDocMaker extends DocMaker {
117 public Document makeDocument() throws Exception {
118 return new Document();
122 // class has to be public so that Class.forName.newInstance() will work
123 public static final class ThreadingDocMaker extends DocMaker {
126 public Document makeDocument() throws Exception {
127 Document doc = new Document();
128 String name = Thread.currentThread().getName();
129 doc.add(new Field(BODY_FIELD, "body_" + name, Store.NO, Index.NOT_ANALYZED_NO_NORMS));
130 doc.add(new Field(TITLE_FIELD, "title_" + name, Store.NO, Index.NOT_ANALYZED_NO_NORMS));
131 doc.add(new Field(DATE_FIELD, "date_" + name, Store.NO, Index.NOT_ANALYZED_NO_NORMS));
137 private static final CompressorStreamFactory csFactory = new CompressorStreamFactory();
139 private PerfRunData createPerfRunData(File file,
140 boolean allowEmptyDocs,
141 String docMakerName) throws Exception {
142 Properties props = new Properties();
143 props.setProperty("doc.maker", docMakerName);
144 props.setProperty("line.file.out", file.getAbsolutePath());
145 props.setProperty("directory", "RAMDirectory"); // no accidental FS dir.
146 if (allowEmptyDocs) {
147 props.setProperty("sufficient.fields", ",");
149 if (docMakerName.equals(LegalJustDateDocMaker.class.getName())) {
150 props.setProperty("line.fields", DocMaker.DATE_FIELD);
151 props.setProperty("sufficient.fields", DocMaker.DATE_FIELD);
153 Config config = new Config(props);
154 return new PerfRunData(config);
157 private void doReadTest(File file, Type fileType, String expTitle,
158 String expDate, String expBody) throws Exception {
159 InputStream in = new FileInputStream(file);
162 in = csFactory.createCompressorInputStream(CompressorStreamFactory.BZIP2, in);
165 in = csFactory.createCompressorInputStream(CompressorStreamFactory.GZIP, in);
167 break; // nothing to do
169 assertFalse("Unknown file type!",true); //fail, should not happen
171 BufferedReader br = new BufferedReader(new InputStreamReader(in, "utf-8"));
173 String line = br.readLine();
174 assertHeaderLine(line);
175 line = br.readLine();
177 String[] parts = line.split(Character.toString(WriteLineDocTask.SEP));
178 int numExpParts = expBody == null ? 2 : 3;
179 assertEquals(numExpParts, parts.length);
180 assertEquals(expTitle, parts[0]);
181 assertEquals(expDate, parts[1]);
182 if (expBody != null) {
183 assertEquals(expBody, parts[2]);
185 assertNull(br.readLine());
191 private void assertHeaderLine(String line) {
192 assertTrue("First line should be a header line",line.startsWith(WriteLineDocTask.FIELDS_HEADER_INDICATOR));
195 /* Tests WriteLineDocTask with a bzip2 format. */
196 public void testBZip2() throws Exception {
198 // Create a document in bz2 format.
199 File file = new File(getWorkDir(), "one-line.bz2");
200 PerfRunData runData = createPerfRunData(file, false, WriteLineDocMaker.class.getName());
201 WriteLineDocTask wldt = new WriteLineDocTask(runData);
205 doReadTest(file, Type.BZIP2, "title", "date", "body");
208 /* Tests WriteLineDocTask with a gzip format. */
209 public void testGZip() throws Exception {
211 // Create a document in gz format.
212 File file = new File(getWorkDir(), "one-line.gz");
213 PerfRunData runData = createPerfRunData(file, false, WriteLineDocMaker.class.getName());
214 WriteLineDocTask wldt = new WriteLineDocTask(runData);
218 doReadTest(file, Type.GZIP, "title", "date", "body");
221 public void testRegularFile() throws Exception {
223 // Create a document in regular format.
224 File file = new File(getWorkDir(), "one-line");
225 PerfRunData runData = createPerfRunData(file, false, WriteLineDocMaker.class.getName());
226 WriteLineDocTask wldt = new WriteLineDocTask(runData);
230 doReadTest(file, Type.PLAIN, "title", "date", "body");
233 public void testCharsReplace() throws Exception {
234 // WriteLineDocTask replaced only \t characters w/ a space, since that's its
235 // separator char. However, it didn't replace newline characters, which
236 // resulted in errors in LineDocSource.
237 File file = new File(getWorkDir(), "one-line");
238 PerfRunData runData = createPerfRunData(file, false, NewLinesDocMaker.class.getName());
239 WriteLineDocTask wldt = new WriteLineDocTask(runData);
243 doReadTest(file, Type.PLAIN, "title text", "date text", "body text two");
246 public void testEmptyBody() throws Exception {
247 // WriteLineDocTask threw away documents w/ no BODY element, even if they
248 // had a TITLE element (LUCENE-1755). It should throw away documents if they
249 // don't have BODY nor TITLE
250 File file = new File(getWorkDir(), "one-line");
251 PerfRunData runData = createPerfRunData(file, false, NoBodyDocMaker.class.getName());
252 WriteLineDocTask wldt = new WriteLineDocTask(runData);
256 doReadTest(file, Type.PLAIN, "title", "date", null);
259 public void testEmptyTitle() throws Exception {
260 File file = new File(getWorkDir(), "one-line");
261 PerfRunData runData = createPerfRunData(file, false, NoTitleDocMaker.class.getName());
262 WriteLineDocTask wldt = new WriteLineDocTask(runData);
266 doReadTest(file, Type.PLAIN, "", "date", "body");
269 /** Fail by default when there's only date */
270 public void testJustDate() throws Exception {
271 File file = new File(getWorkDir(), "one-line");
272 PerfRunData runData = createPerfRunData(file, false, JustDateDocMaker.class.getName());
273 WriteLineDocTask wldt = new WriteLineDocTask(runData);
277 BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf-8"));
279 String line = br.readLine();
280 assertHeaderLine(line);
281 line = br.readLine();
288 public void testLegalJustDate() throws Exception {
289 File file = new File(getWorkDir(), "one-line");
290 PerfRunData runData = createPerfRunData(file, false, LegalJustDateDocMaker.class.getName());
291 WriteLineDocTask wldt = new WriteLineDocTask(runData);
295 BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf-8"));
297 String line = br.readLine();
298 assertHeaderLine(line);
299 line = br.readLine();
306 public void testEmptyDoc() throws Exception {
307 File file = new File(getWorkDir(), "one-line");
308 PerfRunData runData = createPerfRunData(file, true, EmptyDocMaker.class.getName());
309 WriteLineDocTask wldt = new WriteLineDocTask(runData);
313 BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf-8"));
315 String line = br.readLine();
316 assertHeaderLine(line);
317 line = br.readLine();
324 public void testMultiThreaded() throws Exception {
325 File file = new File(getWorkDir(), "one-line");
326 PerfRunData runData = createPerfRunData(file, false, ThreadingDocMaker.class.getName());
327 final WriteLineDocTask wldt = new WriteLineDocTask(runData);
328 Thread[] threads = new Thread[10];
329 for (int i = 0; i < threads.length; i++) {
330 threads[i] = new Thread("t" + i) {
335 } catch (Exception e) {
336 throw new RuntimeException(e);
342 for (Thread t : threads) t.start();
343 for (Thread t : threads) t.join();
347 Set<String> ids = new HashSet<String>();
348 BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf-8"));
350 String line = br.readLine();
351 assertHeaderLine(line); // header line is written once, no matter how many threads there are
352 for (int i = 0; i < threads.length; i++) {
353 line = br.readLine();
354 String[] parts = line.split(Character.toString(WriteLineDocTask.SEP));
355 assertEquals(3, parts.length);
356 // check that all thread names written are the same in the same line
357 String tname = parts[0].substring(parts[0].indexOf('_'));
359 assertEquals(tname, parts[1].substring(parts[1].indexOf('_')));
360 assertEquals(tname, parts[2].substring(parts[2].indexOf('_')));
362 // only threads.length lines should exist
363 assertNull(br.readLine());
364 assertEquals(threads.length, ids.size());