--- /dev/null
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Closeable;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.io.InputStream;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.zip.GZIPInputStream;
+import java.util.Random;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+
+/** Minimal port of contrib/benchmark's LneDocSource +
+ * DocMaker, so tests can enum docs from a line file created
+ * by contrib/benchmark's WriteLineDoc task */
+public class LineFileDocs implements Closeable {
+
+ private BufferedReader reader;
+ private final static int BUFFER_SIZE = 1 << 16; // 64K
+ private final AtomicInteger id = new AtomicInteger();
+ private final String path;
+
+ /** If forever is true, we rewind the file at EOF (repeat
+ * the docs over and over) */
+ public LineFileDocs(Random random, String path) throws IOException {
+ this.path = path;
+ open(random);
+ }
+
+ public LineFileDocs(Random random) throws IOException {
+ this(random, LuceneTestCase.TEST_LINE_DOCS_FILE);
+ }
+
+ public synchronized void close() throws IOException {
+ if (reader != null) {
+ reader.close();
+ reader = null;
+ }
+ }
+
+ private synchronized void open(Random random) throws IOException {
+ InputStream is = getClass().getResourceAsStream(path);
+ if (is == null) {
+ // if its not in classpath, we load it as absolute filesystem path (e.g. Hudson's home dir)
+ is = new FileInputStream(path);
+ }
+ File file = new File(path);
+ long size;
+ if (file.exists()) {
+ size = file.length();
+ } else {
+ size = is.available();
+ }
+ if (path.endsWith(".gz")) {
+ is = new GZIPInputStream(is);
+ // guestimate:
+ size *= 2.8;
+ }
+
+ reader = new BufferedReader(new InputStreamReader(is, "UTF-8"), BUFFER_SIZE);
+
+ // Override sizes for currently "known" line files:
+ if (path.equals("europarl.lines.txt.gz")) {
+ size = 15129506L;
+ } else if (path.equals("/home/hudson/lucene-data/enwiki.random.lines.txt.gz")) {
+ size = 3038178822L;
+ }
+
+ // Randomly seek to starting point:
+ if (random != null && size > 3) {
+ final long seekTo = (random.nextLong()&Long.MAX_VALUE) % (size/3);
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println("TEST: LineFileDocs: seek to fp=" + seekTo + " on open");
+ }
+ reader.skip(seekTo);
+ reader.readLine();
+ }
+ }
+
+ public synchronized void reset(Random random) throws IOException {
+ close();
+ open(random);
+ id.set(0);
+ }
+
+ private final static char SEP = '\t';
+
+ private static final class DocState {
+ final Document doc;
+ final Field titleTokenized;
+ final Field title;
+ final Field body;
+ final Field id;
+ final Field date;
+
+ public DocState() {
+ doc = new Document();
+
+ title = new Field("title", "", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS);
+ doc.add(title);
+
+ titleTokenized = new Field("titleTokenized", "", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
+ doc.add(titleTokenized);
+
+ body = new Field("body", "", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
+ doc.add(body);
+
+ id = new Field("docid", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
+ doc.add(id);
+
+ date = new Field("date", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
+ doc.add(date);
+ }
+ }
+
+ private final ThreadLocal<DocState> threadDocs = new ThreadLocal<DocState>();
+
+ /** Note: Document instance is re-used per-thread */
+ public Document nextDoc() throws IOException {
+ String line;
+ synchronized(this) {
+ line = reader.readLine();
+ if (line == null) {
+ // Always rewind at end:
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println("TEST: LineFileDocs: now rewind file...");
+ }
+ close();
+ open(null);
+ line = reader.readLine();
+ }
+ }
+
+ DocState docState = threadDocs.get();
+ if (docState == null) {
+ docState = new DocState();
+ threadDocs.set(docState);
+ }
+
+ int spot = line.indexOf(SEP);
+ if (spot == -1) {
+ throw new RuntimeException("line: [" + line + "] is in an invalid format !");
+ }
+ int spot2 = line.indexOf(SEP, 1 + spot);
+ if (spot2 == -1) {
+ throw new RuntimeException("line: [" + line + "] is in an invalid format !");
+ }
+
+ docState.body.setValue(line.substring(1+spot2, line.length()));
+ final String title = line.substring(0, spot);
+ docState.title.setValue(title);
+ docState.titleTokenized.setValue(title);
+ docState.date.setValue(line.substring(1+spot, spot2));
+ docState.id.setValue(Integer.toString(id.getAndIncrement()));
+ return docState.doc;
+ }
+}