lucene-java-3.5.0/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java

   1 package org.apache.lucene.benchmark.byTask.feeds;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.BufferedWriter;
  21 import java.io.File;
  22 import java.io.FileOutputStream;
  23 import java.io.IOException;
  24 import java.io.OutputStream;
  25 import java.io.OutputStreamWriter;
  26 import java.util.Properties;
  27
  28 import org.apache.commons.compress.compressors.CompressorStreamFactory;
  29 import org.apache.lucene.analysis.WhitespaceAnalyzer;
  30 import org.apache.lucene.benchmark.BenchmarkTestCase;
  31 import org.apache.lucene.benchmark.byTask.PerfRunData;
  32 import org.apache.lucene.benchmark.byTask.feeds.LineDocSource.HeaderLineParser;
  33 import org.apache.lucene.benchmark.byTask.feeds.LineDocSource.LineParser;
  34 import org.apache.lucene.benchmark.byTask.tasks.AddDocTask;
  35 import org.apache.lucene.benchmark.byTask.tasks.CloseIndexTask;
  36 import org.apache.lucene.benchmark.byTask.tasks.CreateIndexTask;
  37 import org.apache.lucene.benchmark.byTask.tasks.TaskSequence;
  38 import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask;
  39 import org.apache.lucene.benchmark.byTask.utils.Config;
  40 import org.apache.lucene.index.IndexReader;
  41 import org.apache.lucene.index.Term;
  42 import org.apache.lucene.search.IndexSearcher;
  43 import org.apache.lucene.search.TermQuery;
  44 import org.apache.lucene.search.TopDocs;
  45 import org.apache.lucene.util.IOUtils;
  46
  47 /** Tests the functionality of {@link LineDocSource}. */
  48 public class LineDocSourceTest extends BenchmarkTestCase {
  49
  50   private static final CompressorStreamFactory csFactory = new CompressorStreamFactory();
  51
  52   private void createBZ2LineFile(File file, boolean addHeader) throws Exception {
  53     OutputStream out = new FileOutputStream(file);
  54     out = csFactory.createCompressorOutputStream("bzip2", out);
  55     BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "utf-8"));
  56     writeDocsToFile(writer, addHeader, null);
  57     writer.close();
  58   }
  59
  60   private void writeDocsToFile(BufferedWriter writer, boolean addHeader, Properties otherFields) throws IOException {
  61     if (addHeader) {
  62       writer.write(WriteLineDocTask.FIELDS_HEADER_INDICATOR);
  63       writer.write(WriteLineDocTask.SEP);
  64       writer.write(DocMaker.TITLE_FIELD);
  65       writer.write(WriteLineDocTask.SEP);
  66       writer.write(DocMaker.DATE_FIELD);
  67       writer.write(WriteLineDocTask.SEP);
  68       writer.write(DocMaker.BODY_FIELD);
  69       if (otherFields!=null) {
  70         // additional field names in the header
  71         for (Object fn : otherFields.keySet()) {
  72           writer.write(WriteLineDocTask.SEP);
  73           writer.write(fn.toString());
  74         }
  75       }
  76       writer.newLine();
  77     }
  78     StringBuilder doc = new StringBuilder();
  79     doc.append("title").append(WriteLineDocTask.SEP).append("date").append(WriteLineDocTask.SEP).append(DocMaker.BODY_FIELD);
  80     if (otherFields!=null) {
  81       // additional field values in the doc line
  82       for (Object fv : otherFields.values()) {
  83         doc.append(WriteLineDocTask.SEP).append(fv.toString());
  84       }
  85     }
  86     writer.write(doc.toString());
  87     writer.newLine();
  88   }
  89
  90   private void createRegularLineFile(File file, boolean addHeader) throws Exception {
  91     OutputStream out = new FileOutputStream(file);
  92     BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "utf-8"));
  93     writeDocsToFile(writer, addHeader, null);
  94     writer.close();
  95   }
  96
  97   private void createRegularLineFileWithMoreFields(File file, String...extraFields) throws Exception {
  98     OutputStream out = new FileOutputStream(file);
  99     BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "utf-8"));
 100     Properties p = new Properties();
 101     for (String f : extraFields) {
 102       p.setProperty(f, f);
 103     }
 104     writeDocsToFile(writer, true, p);
 105     writer.close();
 106   }
 107
 108   private void doIndexAndSearchTest(File file, Class<? extends LineParser> lineParserClass, String storedField) throws Exception {
 109     doIndexAndSearchTestWithRepeats(file, lineParserClass, 1, storedField); // no extra repetitions
 110     doIndexAndSearchTestWithRepeats(file, lineParserClass, 2, storedField); // 1 extra repetition
 111     doIndexAndSearchTestWithRepeats(file, lineParserClass, 4, storedField); // 3 extra repetitions
 112   }
 113
 114   private void doIndexAndSearchTestWithRepeats(File file,
 115       Class<? extends LineParser> lineParserClass, int numAdds,
 116       String storedField) throws Exception {
 117
 118     IndexReader reader = null;
 119     IndexSearcher searcher = null;
 120     PerfRunData runData = null;
 121     try {
 122       Properties props = new Properties();
 123
 124       // LineDocSource specific settings.
 125       props.setProperty("docs.file", file.getAbsolutePath());
 126       if (lineParserClass != null) {
 127         props.setProperty("line.parser", lineParserClass.getName());
 128       }
 129
 130       // Indexing configuration.
 131       props.setProperty("analyzer", WhitespaceAnalyzer.class.getName());
 132       props.setProperty("content.source", LineDocSource.class.getName());
 133       props.setProperty("directory", "RAMDirectory");
 134       props.setProperty("doc.stored", "true");
 135       props.setProperty("doc.index.props", "true");
 136
 137       // Create PerfRunData
 138       Config config = new Config(props);
 139       runData = new PerfRunData(config);
 140
 141       TaskSequence tasks = new TaskSequence(runData, "testBzip2", null, false);
 142       tasks.addTask(new CreateIndexTask(runData));
 143       for (int i=0; i<numAdds; i++) {
 144         tasks.addTask(new AddDocTask(runData));
 145       }
 146       tasks.addTask(new CloseIndexTask(runData));
 147       try {
 148         tasks.doLogic();
 149       } finally {
 150         tasks.close();
 151       }
 152
 153       reader = IndexReader.open(runData.getDirectory(), true);
 154       searcher = new IndexSearcher(reader);
 155       TopDocs td = searcher.search(new TermQuery(new Term("body", "body")), 10);
 156       assertEquals(numAdds, td.totalHits);
 157       assertNotNull(td.scoreDocs[0]);
 158
 159       if (storedField==null) {
 160         storedField = DocMaker.BODY_FIELD; // added to all docs and satisfies field-name == value
 161       }
 162       assertEquals("Wrong field value", storedField, searcher.doc(0).get(storedField));
 163     } finally {
 164       IOUtils.close(searcher, reader, runData);
 165     }
 166   }
 167
 168   /* Tests LineDocSource with a bzip2 input stream. */
 169   public void testBZip2() throws Exception {
 170     File file = new File(getWorkDir(), "one-line.bz2");
 171     createBZ2LineFile(file,true);
 172     doIndexAndSearchTest(file, null, null);
 173   }
 174
 175   public void testBZip2NoHeaderLine() throws Exception {
 176     File file = new File(getWorkDir(), "one-line.bz2");
 177     createBZ2LineFile(file,false);
 178     doIndexAndSearchTest(file, null, null);
 179   }
 180
 181   public void testRegularFile() throws Exception {
 182     File file = new File(getWorkDir(), "one-line");
 183     createRegularLineFile(file,true);
 184     doIndexAndSearchTest(file, null, null);
 185   }
 186
 187   public void testRegularFileSpecialHeader() throws Exception {
 188     File file = new File(getWorkDir(), "one-line");
 189     createRegularLineFile(file,true);
 190     doIndexAndSearchTest(file, HeaderLineParser.class, null);
 191   }
 192
 193   public void testRegularFileNoHeaderLine() throws Exception {
 194     File file = new File(getWorkDir(), "one-line");
 195     createRegularLineFile(file,false);
 196     doIndexAndSearchTest(file, null, null);
 197   }
 198
 199   public void testInvalidFormat() throws Exception {
 200     String[] testCases = new String[] {
 201       "", // empty line
 202       "title", // just title
 203       "title" + WriteLineDocTask.SEP, // title + SEP
 204       "title" + WriteLineDocTask.SEP + "body", // title + SEP + body
 205       // note that title + SEP + body + SEP is a valid line, which results in an
 206       // empty body
 207     };
 208
 209     for (int i = 0; i < testCases.length; i++) {
 210       File file = new File(getWorkDir(), "one-line");
 211       BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "utf-8"));
 212       writer.write(testCases[i]);
 213       writer.newLine();
 214       writer.close();
 215       try {
 216         doIndexAndSearchTest(file, null, null);
 217         fail("Some exception should have been thrown for: [" + testCases[i] + "]");
 218       } catch (Exception e) {
 219         // expected.
 220       }
 221     }
 222   }
 223
 224   /** Doc Name is not part of the default header */
 225   public void testWithDocsName()  throws Exception {
 226     File file = new File(getWorkDir(), "one-line");
 227     createRegularLineFileWithMoreFields(file, DocMaker.NAME_FIELD);
 228     doIndexAndSearchTest(file, null, DocMaker.NAME_FIELD);
 229   }
 230
 231   /** Use fields names that are not defined in Docmaker and so will go to Properties */
 232   public void testWithProperties()  throws Exception {
 233     File file = new File(getWorkDir(), "one-line");
 234     String specialField = "mySpecialField";
 235     createRegularLineFileWithMoreFields(file, specialField);
 236     doIndexAndSearchTest(file, null, specialField);
 237   }
 238
 239 }