1 package org.apache.lucene.search.suggest;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.BufferedReader;
21 import java.io.InputStreamReader;
23 import java.nio.charset.Charset;
24 import java.util.ArrayList;
25 import java.util.Arrays;
26 import java.util.Collections;
27 import java.util.List;
28 import java.util.Locale;
29 import java.util.Random;
30 import java.util.concurrent.Callable;
32 import org.apache.lucene.util.LuceneTestCase;
33 import org.apache.lucene.util.RamUsageEstimator;
34 import org.apache.lucene.search.suggest.Lookup;
35 import org.apache.lucene.search.suggest.fst.FSTLookup;
36 import org.apache.lucene.search.suggest.jaspell.JaspellLookup;
37 import org.apache.lucene.search.suggest.tst.TSTLookup;
39 import org.junit.BeforeClass;
40 import org.junit.Ignore;
43 * Benchmarks tests for implementations of {@link Lookup} interface.
45 @Ignore("COMMENT ME TO RUN BENCHMARKS!")
46 public class LookupBenchmarkTest extends LuceneTestCase {
47 @SuppressWarnings("unchecked")
48 private final List<Class<? extends Lookup>> benchmarkClasses = Arrays.asList(
53 private final static int rounds = 15;
54 private final static int warmup = 5;
56 private final int num = 7;
57 private final boolean onlyMorePopular = true;
59 private final static Random random = new Random(0xdeadbeef);
62 * Input term/weight pairs.
64 private static TermFreq [] dictionaryInput;
67 * Benchmark term/weight pairs (randomized order).
69 private static List<TermFreq> benchmarkInput;
72 * Loads terms and frequencies from Wikipedia (cached).
75 public static void setup() throws Exception {
76 List<TermFreq> input = readTop50KWiki();
77 Collections.shuffle(input, random);
78 LookupBenchmarkTest.dictionaryInput = input.toArray(new TermFreq [input.size()]);
79 Collections.shuffle(input, random);
80 LookupBenchmarkTest.benchmarkInput = input;
83 static final Charset UTF_8 = Charset.forName("UTF-8");
86 * Collect the multilingual input for benchmarks/ tests.
88 public static List<TermFreq> readTop50KWiki() throws Exception {
89 List<TermFreq> input = new ArrayList<TermFreq>();
90 URL resource = LookupBenchmarkTest.class.getResource("Top50KWiki.utf8");
91 assert resource != null : "Resource missing: Top50KWiki.utf8";
94 BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), UTF_8));
95 while ((line = br.readLine()) != null) {
96 int tab = line.indexOf('|');
97 assertTrue("No | separator?: " + line, tab >= 0);
98 float weight = Float.parseFloat(line.substring(tab + 1));
99 String key = line.substring(0, tab);
100 input.add(new TermFreq(key, weight));
107 * Test construction time.
109 public void testConstructionTime() throws Exception {
110 System.err.println("-- construction time");
111 for (final Class<? extends Lookup> cls : benchmarkClasses) {
112 BenchmarkResult result = measure(new Callable<Integer>() {
113 public Integer call() throws Exception {
114 final Lookup lookup = buildLookup(cls, dictionaryInput);
115 return lookup.hashCode();
120 String.format(Locale.ENGLISH, "%-15s input: %d, time[ms]: %s",
122 dictionaryInput.length,
123 result.average.toString()));
128 * Test memory required for the storage.
130 public void testStorageNeeds() throws Exception {
131 System.err.println("-- RAM consumption");
132 final RamUsageEstimator rue = new RamUsageEstimator();
133 for (Class<? extends Lookup> cls : benchmarkClasses) {
134 Lookup lookup = buildLookup(cls, dictionaryInput);
136 String.format(Locale.ENGLISH, "%-15s size[B]:%,13d",
137 lookup.getClass().getSimpleName(),
138 rue.estimateRamUsage(lookup)));
143 * Create {@link Lookup} instance and populate it.
145 private Lookup buildLookup(Class<? extends Lookup> cls, TermFreq[] input) throws Exception {
146 Lookup lookup = cls.newInstance();
147 lookup.build(new TermFreqArrayIterator(input));
152 * Test performance of lookup on full hits.
154 public void testPerformanceOnFullHits() throws Exception {
155 final int minPrefixLen = 100;
156 final int maxPrefixLen = 200;
157 runPerformanceTest(minPrefixLen, maxPrefixLen, num, onlyMorePopular);
161 * Test performance of lookup on longer term prefixes (6-9 letters or shorter).
163 public void testPerformanceOnPrefixes6_9() throws Exception {
164 final int minPrefixLen = 6;
165 final int maxPrefixLen = 9;
166 runPerformanceTest(minPrefixLen, maxPrefixLen, num, onlyMorePopular);
170 * Test performance of lookup on short term prefixes (2-4 letters or shorter).
172 public void testPerformanceOnPrefixes2_4() throws Exception {
173 final int minPrefixLen = 2;
174 final int maxPrefixLen = 4;
175 runPerformanceTest(minPrefixLen, maxPrefixLen, num, onlyMorePopular);
179 * Run the actual benchmark.
181 public void runPerformanceTest(final int minPrefixLen, final int maxPrefixLen,
182 final int num, final boolean onlyMorePopular) throws Exception {
183 System.err.println(String.format(Locale.ENGLISH,
184 "-- prefixes: %d-%d, num: %d, onlyMorePopular: %s",
185 minPrefixLen, maxPrefixLen, num, onlyMorePopular));
187 for (Class<? extends Lookup> cls : benchmarkClasses) {
188 final Lookup lookup = buildLookup(cls, dictionaryInput);
190 final List<String> input = new ArrayList<String>(benchmarkInput.size());
191 for (TermFreq tf : benchmarkInput) {
192 input.add(tf.term.substring(0, Math.min(tf.term.length(),
193 minPrefixLen + random.nextInt(maxPrefixLen - minPrefixLen + 1))));
196 BenchmarkResult result = measure(new Callable<Integer>() {
197 public Integer call() throws Exception {
199 for (String term : input) {
200 v += lookup.lookup(term, onlyMorePopular, num).size();
207 String.format(Locale.ENGLISH, "%-15s queries: %d, time[ms]: %s, ~qps: %.0f",
208 lookup.getClass().getSimpleName(),
210 result.average.toString(),
211 input.size() / result.average.avg));
216 * Do the measurements.
218 private BenchmarkResult measure(Callable<Integer> callable) {
219 final double NANOS_PER_MS = 1000000;
222 List<Double> times = new ArrayList<Double>();
223 for (int i = 0; i < warmup + rounds; i++) {
224 final long start = System.nanoTime();
225 guard = callable.call().intValue();
226 times.add((System.nanoTime() - start) / NANOS_PER_MS);
228 return new BenchmarkResult(times, warmup, rounds);
229 } catch (Exception e) {
230 throw new RuntimeException(e);
234 /** Guard against opts. */
235 @SuppressWarnings("unused")
236 private static volatile int guard;
238 private static class BenchmarkResult {
239 /** Average time per round (ms). */
240 public final Average average;
242 public BenchmarkResult(List<Double> times, int warmup, int rounds) {
243 this.average = Average.from(times.subList(warmup, times.size()));