+++ /dev/null
-package org.apache.lucene.benchmark.byTask.feeds;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-
-import org.apache.lucene.benchmark.byTask.utils.Config;
-
-/**
- * Represents content from a specified source, such as TREC, Reuters etc. A
- * {@link ContentSource} is responsible for creating {@link DocData} objects for
- * its documents to be consumed by {@link DocMaker}. It also keeps track
- * of various statistics, such as how many documents were generated, size in
- * bytes etc.
- * <p>
- * Supports the following configuration parameters:
- * <ul>
- * <li><b>content.source.forever</b> - specifies whether to generate documents
- * forever (<b>default=true</b>).
- * <li><b>content.source.verbose</b> - specifies whether messages should be
- * output by the content source (<b>default=false</b>).
- * <li><b>content.source.encoding</b> - specifies which encoding to use when
- * reading the files of that content source. Certain implementations may define
- * a default value if this parameter is not specified. (<b>default=null</b>).
- * <li><b>content.source.log.step</b> - specifies for how many documents a
- * message should be logged. If set to 0 it means no logging should occur.
- * <b>NOTE:</b> if verbose is set to false, logging should not occur even if
- * logStep is not 0 (<b>default=0</b>).
- * </ul>
- */
-public abstract class ContentSource {
-
- private long bytesCount;
- private long totalBytesCount;
- private int docsCount;
- private int totalDocsCount;
- private Config config;
-
- protected boolean forever;
- protected int logStep;
- protected boolean verbose;
- protected String encoding;
-
- /** update count of bytes generated by this source */
- protected final synchronized void addBytes(long numBytes) {
- bytesCount += numBytes;
- totalBytesCount += numBytes;
- }
-
- /** update count of documents generated by this source */
- protected final synchronized void addDoc() {
- ++docsCount;
- ++totalDocsCount;
- }
-
- /**
- * A convenience method for collecting all the files of a content source from
- * a given directory. The collected {@link File} instances are stored in the
- * given <code>files</code>.
- */
- protected final void collectFiles(File dir, ArrayList<File> files) {
- if (!dir.canRead()) {
- return;
- }
-
- File[] dirFiles = dir.listFiles();
- Arrays.sort(dirFiles);
- for (int i = 0; i < dirFiles.length; i++) {
- File file = dirFiles[i];
- if (file.isDirectory()) {
- collectFiles(file, files);
- } else if (file.canRead()) {
- files.add(file);
- }
- }
- }
-
- /**
- * Returns true whether it's time to log a message (depending on verbose and
- * the number of documents generated).
- */
- protected final boolean shouldLog() {
- return verbose && logStep > 0 && docsCount % logStep == 0;
- }
-
- /** Called when reading from this content source is no longer required. */
- public abstract void close() throws IOException;
-
- /** Returns the number of bytes generated since last reset. */
- public final long getBytesCount() { return bytesCount; }
-
- /** Returns the number of generated documents since last reset. */
- public final int getDocsCount() { return docsCount; }
-
- public final Config getConfig() { return config; }
-
- /** Returns the next {@link DocData} from the content source. */
- public abstract DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException;
-
- /** Returns the total number of bytes that were generated by this source. */
- public final long getTotalBytesCount() { return totalBytesCount; }
-
- /** Returns the total number of generated documents. */
- public final int getTotalDocsCount() { return totalDocsCount; }
-
- /**
- * Resets the input for this content source, so that the test would behave as
- * if it was just started, input-wise.
- * <p>
- * <b>NOTE:</b> the default implementation resets the number of bytes and
- * documents generated since the last reset, so it's important to call
- * super.resetInputs in case you override this method.
- */
- public void resetInputs() throws IOException {
- bytesCount = 0;
- docsCount = 0;
- }
-
- /**
- * Sets the {@link Config} for this content source. If you override this
- * method, you must call super.setConfig.
- */
- public void setConfig(Config config) {
- this.config = config;
- forever = config.get("content.source.forever", true);
- logStep = config.get("content.source.log.step", 0);
- verbose = config.get("content.source.verbose", false);
- encoding = config.get("content.source.encoding", null);
- }
-
-}