+++ /dev/null
-package org.apache.lucene.util;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.Closeable;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.BufferedReader;
-import java.io.InputStreamReader;
-import java.io.InputStream;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.zip.GZIPInputStream;
-import java.util.Random;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-
-/** Minimal port of contrib/benchmark's LneDocSource +
- * DocMaker, so tests can enum docs from a line file created
- * by contrib/benchmark's WriteLineDoc task */
-public class LineFileDocs implements Closeable {
-
- private BufferedReader reader;
- private final static int BUFFER_SIZE = 1 << 16; // 64K
- private final AtomicInteger id = new AtomicInteger();
- private final String path;
-
- /** If forever is true, we rewind the file at EOF (repeat
- * the docs over and over) */
- public LineFileDocs(Random random, String path) throws IOException {
- this.path = path;
- open(random);
- }
-
- public LineFileDocs(Random random) throws IOException {
- this(random, LuceneTestCase.TEST_LINE_DOCS_FILE);
- }
-
- public synchronized void close() throws IOException {
- if (reader != null) {
- reader.close();
- reader = null;
- }
- }
-
- private synchronized void open(Random random) throws IOException {
- InputStream is = getClass().getResourceAsStream(path);
- if (is == null) {
- // if its not in classpath, we load it as absolute filesystem path (e.g. Hudson's home dir)
- is = new FileInputStream(path);
- }
- File file = new File(path);
- long size;
- if (file.exists()) {
- size = file.length();
- } else {
- size = is.available();
- }
- if (path.endsWith(".gz")) {
- is = new GZIPInputStream(is);
- // guestimate:
- size *= 2.8;
- }
-
- reader = new BufferedReader(new InputStreamReader(is, "UTF-8"), BUFFER_SIZE);
-
- // Override sizes for currently "known" line files:
- if (path.equals("europarl.lines.txt.gz")) {
- size = 15129506L;
- } else if (path.equals("/home/hudson/lucene-data/enwiki.random.lines.txt.gz")) {
- size = 3038178822L;
- }
-
- // Randomly seek to starting point:
- if (random != null && size > 3) {
- final long seekTo = (random.nextLong()&Long.MAX_VALUE) % (size/3);
- if (LuceneTestCase.VERBOSE) {
- System.out.println("TEST: LineFileDocs: seek to fp=" + seekTo + " on open");
- }
- reader.skip(seekTo);
- reader.readLine();
- }
- }
-
- public synchronized void reset(Random random) throws IOException {
- close();
- open(random);
- id.set(0);
- }
-
- private final static char SEP = '\t';
-
- private static final class DocState {
- final Document doc;
- final Field titleTokenized;
- final Field title;
- final Field body;
- final Field id;
- final Field date;
-
- public DocState() {
- doc = new Document();
-
- title = new Field("title", "", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS);
- doc.add(title);
-
- titleTokenized = new Field("titleTokenized", "", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
- doc.add(titleTokenized);
-
- body = new Field("body", "", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
- doc.add(body);
-
- id = new Field("docid", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
- doc.add(id);
-
- date = new Field("date", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
- doc.add(date);
- }
- }
-
- private final ThreadLocal<DocState> threadDocs = new ThreadLocal<DocState>();
-
- /** Note: Document instance is re-used per-thread */
- public Document nextDoc() throws IOException {
- String line;
- synchronized(this) {
- line = reader.readLine();
- if (line == null) {
- // Always rewind at end:
- if (LuceneTestCase.VERBOSE) {
- System.out.println("TEST: LineFileDocs: now rewind file...");
- }
- close();
- open(null);
- line = reader.readLine();
- }
- }
-
- DocState docState = threadDocs.get();
- if (docState == null) {
- docState = new DocState();
- threadDocs.set(docState);
- }
-
- int spot = line.indexOf(SEP);
- if (spot == -1) {
- throw new RuntimeException("line: [" + line + "] is in an invalid format !");
- }
- int spot2 = line.indexOf(SEP, 1 + spot);
- if (spot2 == -1) {
- throw new RuntimeException("line: [" + line + "] is in an invalid format !");
- }
-
- docState.body.setValue(line.substring(1+spot2, line.length()));
- final String title = line.substring(0, spot);
- docState.title.setValue(title);
- docState.titleTokenized.setValue(title);
- docState.date.setValue(line.substring(1+spot, spot2));
- docState.id.setValue(Integer.toString(id.getAndIncrement()));
- return docState.doc;
- }
-}