1 package org.apache.lucene.benchmark.byTask.tasks;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.Reader;
21 import java.util.List;
23 import org.apache.lucene.analysis.Analyzer;
24 import org.apache.lucene.analysis.TokenStream;
25 import org.apache.lucene.benchmark.byTask.PerfRunData;
26 import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
27 import org.apache.lucene.document.Document;
28 import org.apache.lucene.document.Fieldable;
29 import org.apache.lucene.document.NumericField;
32 * Simple task to test performance of tokenizers. It just
33 * creates a token stream for each field of the document and
34 * read all tokens out of that stream.
36 public class ReadTokensTask extends PerfTask {
38 public ReadTokensTask(PerfRunData runData) {
42 private int totalTokenCount = 0;
44 // volatile data passed between setup(), doLogic(), tearDown().
45 private Document doc = null;
48 public void setup() throws Exception {
50 DocMaker docMaker = getRunData().getDocMaker();
51 doc = docMaker.makeDocument();
55 protected String getLogMessage(int recsCount) {
56 return "read " + recsCount + " docs; " + totalTokenCount + " tokens";
60 public void tearDown() throws Exception {
66 public int doLogic() throws Exception {
67 List<Fieldable> fields = doc.getFields();
68 Analyzer analyzer = getRunData().getAnalyzer();
70 for(final Fieldable field : fields) {
71 if (!field.isTokenized() || field instanceof NumericField) continue;
73 final TokenStream stream;
74 final TokenStream streamValue = field.tokenStreamValue();
76 if (streamValue != null)
79 // the field does not have a TokenStream,
80 // so we have to obtain one from the analyzer
81 final Reader reader; // find or make Reader
82 final Reader readerValue = field.readerValue();
84 if (readerValue != null)
87 String stringValue = field.stringValue();
88 if (stringValue == null)
89 throw new IllegalArgumentException("field must have either TokenStream, String or Reader value");
90 stringReader.init(stringValue);
91 reader = stringReader;
95 stream = analyzer.reusableTokenStream(field.name(), reader);
98 // reset the TokenStream to the first token
101 while(stream.incrementToken())
104 totalTokenCount += tokenCount;
108 /* Simple StringReader that can be reset to a new string;
109 * we use this when tokenizing the string value from a
111 ReusableStringReader stringReader = new ReusableStringReader();
113 private final static class ReusableStringReader extends Reader {
118 ReusableStringReader() {}
119 void init(String s) {
125 public int read(char[] c) {
126 return read(c, 0, c.length);
129 public int read(char[] c, int off, int len) {
131 s.getChars(upto, upto+len, c, off);
135 } else if (0 == left) {
138 s.getChars(upto, upto+left, c, off);
146 public void close() {}