+++ /dev/null
-package org.apache.lucene.search.suggest;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.BufferedReader;
-import java.io.InputStreamReader;
-import java.net.URL;
-import java.nio.charset.Charset;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-import java.util.Locale;
-import java.util.Random;
-import java.util.concurrent.Callable;
-
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.search.suggest.Lookup;
-import org.apache.lucene.search.suggest.fst.FSTLookup;
-import org.apache.lucene.search.suggest.jaspell.JaspellLookup;
-import org.apache.lucene.search.suggest.tst.TSTLookup;
-
-import org.junit.BeforeClass;
-import org.junit.Ignore;
-
-/**
- * Benchmarks tests for implementations of {@link Lookup} interface.
- */
-@Ignore("COMMENT ME TO RUN BENCHMARKS!")
-public class LookupBenchmarkTest extends LuceneTestCase {
- @SuppressWarnings("unchecked")
- private final List<Class<? extends Lookup>> benchmarkClasses = Arrays.asList(
- JaspellLookup.class,
- TSTLookup.class,
- FSTLookup.class);
-
- private final static int rounds = 15;
- private final static int warmup = 5;
-
- private final int num = 7;
- private final boolean onlyMorePopular = true;
-
- private final static Random random = new Random(0xdeadbeef);
-
- /**
- * Input term/weight pairs.
- */
- private static TermFreq [] dictionaryInput;
-
- /**
- * Benchmark term/weight pairs (randomized order).
- */
- private static List<TermFreq> benchmarkInput;
-
- /**
- * Loads terms and frequencies from Wikipedia (cached).
- */
- @BeforeClass
- public static void setup() throws Exception {
- List<TermFreq> input = readTop50KWiki();
- Collections.shuffle(input, random);
- LookupBenchmarkTest.dictionaryInput = input.toArray(new TermFreq [input.size()]);
- Collections.shuffle(input, random);
- LookupBenchmarkTest.benchmarkInput = input;
- }
-
- static final Charset UTF_8 = Charset.forName("UTF-8");
-
- /**
- * Collect the multilingual input for benchmarks/ tests.
- */
- public static List<TermFreq> readTop50KWiki() throws Exception {
- List<TermFreq> input = new ArrayList<TermFreq>();
- URL resource = LookupBenchmarkTest.class.getResource("Top50KWiki.utf8");
- assert resource != null : "Resource missing: Top50KWiki.utf8";
-
- String line = null;
- BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), UTF_8));
- while ((line = br.readLine()) != null) {
- int tab = line.indexOf('|');
- assertTrue("No | separator?: " + line, tab >= 0);
- float weight = Float.parseFloat(line.substring(tab + 1));
- String key = line.substring(0, tab);
- input.add(new TermFreq(key, weight));
- }
- br.close();
- return input;
- }
-
- /**
- * Test construction time.
- */
- public void testConstructionTime() throws Exception {
- System.err.println("-- construction time");
- for (final Class<? extends Lookup> cls : benchmarkClasses) {
- BenchmarkResult result = measure(new Callable<Integer>() {
- public Integer call() throws Exception {
- final Lookup lookup = buildLookup(cls, dictionaryInput);
- return lookup.hashCode();
- }
- });
-
- System.err.println(
- String.format(Locale.ENGLISH, "%-15s input: %d, time[ms]: %s",
- cls.getSimpleName(),
- dictionaryInput.length,
- result.average.toString()));
- }
- }
-
- /**
- * Test memory required for the storage.
- */
- public void testStorageNeeds() throws Exception {
- System.err.println("-- RAM consumption");
- final RamUsageEstimator rue = new RamUsageEstimator();
- for (Class<? extends Lookup> cls : benchmarkClasses) {
- Lookup lookup = buildLookup(cls, dictionaryInput);
- System.err.println(
- String.format(Locale.ENGLISH, "%-15s size[B]:%,13d",
- lookup.getClass().getSimpleName(),
- rue.estimateRamUsage(lookup)));
- }
- }
-
- /**
- * Create {@link Lookup} instance and populate it.
- */
- private Lookup buildLookup(Class<? extends Lookup> cls, TermFreq[] input) throws Exception {
- Lookup lookup = cls.newInstance();
- lookup.build(new TermFreqArrayIterator(input));
- return lookup;
- }
-
- /**
- * Test performance of lookup on full hits.
- */
- public void testPerformanceOnFullHits() throws Exception {
- final int minPrefixLen = 100;
- final int maxPrefixLen = 200;
- runPerformanceTest(minPrefixLen, maxPrefixLen, num, onlyMorePopular);
- }
-
- /**
- * Test performance of lookup on longer term prefixes (6-9 letters or shorter).
- */
- public void testPerformanceOnPrefixes6_9() throws Exception {
- final int minPrefixLen = 6;
- final int maxPrefixLen = 9;
- runPerformanceTest(minPrefixLen, maxPrefixLen, num, onlyMorePopular);
- }
-
- /**
- * Test performance of lookup on short term prefixes (2-4 letters or shorter).
- */
- public void testPerformanceOnPrefixes2_4() throws Exception {
- final int minPrefixLen = 2;
- final int maxPrefixLen = 4;
- runPerformanceTest(minPrefixLen, maxPrefixLen, num, onlyMorePopular);
- }
-
- /**
- * Run the actual benchmark.
- */
- public void runPerformanceTest(final int minPrefixLen, final int maxPrefixLen,
- final int num, final boolean onlyMorePopular) throws Exception {
- System.err.println(String.format(Locale.ENGLISH,
- "-- prefixes: %d-%d, num: %d, onlyMorePopular: %s",
- minPrefixLen, maxPrefixLen, num, onlyMorePopular));
-
- for (Class<? extends Lookup> cls : benchmarkClasses) {
- final Lookup lookup = buildLookup(cls, dictionaryInput);
-
- final List<String> input = new ArrayList<String>(benchmarkInput.size());
- for (TermFreq tf : benchmarkInput) {
- input.add(tf.term.substring(0, Math.min(tf.term.length(),
- minPrefixLen + random.nextInt(maxPrefixLen - minPrefixLen + 1))));
- }
-
- BenchmarkResult result = measure(new Callable<Integer>() {
- public Integer call() throws Exception {
- int v = 0;
- for (String term : input) {
- v += lookup.lookup(term, onlyMorePopular, num).size();
- }
- return v;
- }
- });
-
- System.err.println(
- String.format(Locale.ENGLISH, "%-15s queries: %d, time[ms]: %s, ~qps: %.0f",
- lookup.getClass().getSimpleName(),
- input.size(),
- result.average.toString(),
- input.size() / result.average.avg));
- }
- }
-
- /**
- * Do the measurements.
- */
- private BenchmarkResult measure(Callable<Integer> callable) {
- final double NANOS_PER_MS = 1000000;
-
- try {
- List<Double> times = new ArrayList<Double>();
- for (int i = 0; i < warmup + rounds; i++) {
- final long start = System.nanoTime();
- guard = callable.call().intValue();
- times.add((System.nanoTime() - start) / NANOS_PER_MS);
- }
- return new BenchmarkResult(times, warmup, rounds);
- } catch (Exception e) {
- throw new RuntimeException(e);
- }
- }
-
- /** Guard against opts. */
- @SuppressWarnings("unused")
- private static volatile int guard;
-
- private static class BenchmarkResult {
- /** Average time per round (ms). */
- public final Average average;
-
- public BenchmarkResult(List<Double> times, int warmup, int rounds) {
- this.average = Average.from(times.subList(warmup, times.size()));
- }
- }
-}
\ No newline at end of file