1 package org.apache.lucene.benchmark.byTask.feeds;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.Closeable;
22 import java.io.IOException;
23 import java.util.ArrayList;
24 import java.util.Arrays;
26 import org.apache.lucene.benchmark.byTask.utils.Config;
27 import org.apache.lucene.benchmark.byTask.utils.Format;
30 * Base class for source of data for benchmarking
32 * Keeps track of various statistics, such as how many data items were generated,
35 * Supports the following configuration parameters:
37 * <li><b>content.source.forever</b> - specifies whether to generate items
38 * forever (<b>default=true</b>).
39 * <li><b>content.source.verbose</b> - specifies whether messages should be
40 * output by the content source (<b>default=false</b>).
41 * <li><b>content.source.encoding</b> - specifies which encoding to use when
42 * reading the files of that content source. Certain implementations may define
43 * a default value if this parameter is not specified. (<b>default=null</b>).
44 * <li><b>content.source.log.step</b> - specifies for how many items a
45 * message should be logged. If set to 0 it means no logging should occur.
46 * <b>NOTE:</b> if verbose is set to false, logging should not occur even if
47 * logStep is not 0 (<b>default=0</b>).
50 public abstract class ContentItemsSource implements Closeable {
52 private long bytesCount;
53 private long totalBytesCount;
54 private int itemCount;
55 private int totalItemCount;
56 private Config config;
58 private int lastPrintedNumUniqueTexts = 0;
59 private long lastPrintedNumUniqueBytes = 0;
60 private int printNum = 0;
62 protected boolean forever;
63 protected int logStep;
64 protected boolean verbose;
65 protected String encoding;
67 /** update count of bytes generated by this source */
68 protected final synchronized void addBytes(long numBytes) {
69 bytesCount += numBytes;
70 totalBytesCount += numBytes;
73 /** update count of items generated by this source */
74 protected final synchronized void addItem() {
80 * A convenience method for collecting all the files of a content source from
81 * a given directory. The collected {@link File} instances are stored in the
82 * given <code>files</code>.
84 protected final void collectFiles(File dir, ArrayList<File> files) {
89 File[] dirFiles = dir.listFiles();
90 Arrays.sort(dirFiles);
91 for (int i = 0; i < dirFiles.length; i++) {
92 File file = dirFiles[i];
93 if (file.isDirectory()) {
94 collectFiles(file, files);
95 } else if (file.canRead()) {
102 * Returns true whether it's time to log a message (depending on verbose and
103 * the number of items generated).
105 protected final boolean shouldLog() {
106 return verbose && logStep > 0 && itemCount % logStep == 0;
109 /** Called when reading from this content source is no longer required. */
110 public abstract void close() throws IOException;
112 /** Returns the number of bytes generated since last reset. */
113 public final long getBytesCount() { return bytesCount; }
115 /** Returns the number of generated items since last reset. */
116 public final int getItemsCount() { return itemCount; }
118 public final Config getConfig() { return config; }
120 /** Returns the total number of bytes that were generated by this source. */
121 public final long getTotalBytesCount() { return totalBytesCount; }
123 /** Returns the total number of generated items. */
124 public final int getTotalItemsCount() { return totalItemCount; }
127 * Resets the input for this content source, so that the test would behave as
128 * if it was just started, input-wise.
130 * <b>NOTE:</b> the default implementation resets the number of bytes and
131 * items generated since the last reset, so it's important to call
132 * super.resetInputs in case you override this method.
134 public void resetInputs() throws IOException {
140 * Sets the {@link Config} for this content source. If you override this
141 * method, you must call super.setConfig.
143 public void setConfig(Config config) {
144 this.config = config;
145 forever = config.get("content.source.forever", true);
146 logStep = config.get("content.source.log.step", 0);
147 verbose = config.get("content.source.verbose", false);
148 encoding = config.get("content.source.encoding", null);
151 public void printStatistics(String itemsName) {
155 boolean print = false;
157 StringBuilder sb = new StringBuilder();
158 String newline = System.getProperty("line.separator");
159 sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline);
160 int nut = getTotalItemsCount();
161 if (nut > lastPrintedNumUniqueTexts) {
163 sb.append("total count of "+itemsName+": ").append(Format.format(0,nut,col)).append(newline);
164 lastPrintedNumUniqueTexts = nut;
166 long nub = getTotalBytesCount();
167 if (nub > lastPrintedNumUniqueBytes) {
169 sb.append("total bytes of "+itemsName+": ").append(Format.format(0,nub,col)).append(newline);
170 lastPrintedNumUniqueBytes = nub;
172 if (getItemsCount() > 0) {
174 sb.append("num "+itemsName+" added since last inputs reset: ").append(Format.format(0,getItemsCount(),col)).append(newline);
175 sb.append("total bytes added for "+itemsName+" since last inputs reset: ").append(Format.format(0,getBytesCount(),col)).append(newline);
178 System.out.println(sb.append(newline).toString());