1 package org.apache.lucene.benchmark.byTask.tasks;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.lang.reflect.Constructor;
21 import java.util.StringTokenizer;
23 import org.apache.lucene.analysis.Analyzer;
24 import org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper;
25 import org.apache.lucene.analysis.shingle.ShingleFilter;
26 import org.apache.lucene.benchmark.byTask.PerfRunData;
27 import org.apache.lucene.util.Version;
30 * Task to support benchmarking ShingleFilter / ShingleAnalyzerWrapper
33 * <li> <code>NewShingleAnalyzer</code> (constructs with all defaults)
34 * <li> <code>NewShingleAnalyzer(analyzer:o.a.l.analysis.StandardAnalyzer,maxShingleSize:2,outputUnigrams:true)</code>
38 public class NewShingleAnalyzerTask extends PerfTask {
40 private String analyzerClassName = "standard.StandardAnalyzer";
41 private int maxShingleSize = 2;
42 private boolean outputUnigrams = true;
44 public NewShingleAnalyzerTask(PerfRunData runData) {
48 private void setAnalyzer() throws Exception {
49 Class<? extends Analyzer> clazz = null;
50 Analyzer wrappedAnalyzer;
52 if (analyzerClassName == null || analyzerClassName.equals("")) {
54 = "org.apache.lucene.analysis.standard.StandardAnalyzer";
56 if (analyzerClassName.indexOf(".") == -1
57 || analyzerClassName.startsWith("standard.")) {
58 //there is no package name, assume o.a.l.analysis
59 analyzerClassName = "org.apache.lucene.analysis." + analyzerClassName;
61 clazz = Class.forName(analyzerClassName).asSubclass(Analyzer.class);
62 // first try to use a ctor with version parameter (needed for many new
63 // Analyzers that have no default one anymore)
64 Constructor<? extends Analyzer> ctor = clazz.getConstructor(Version.class);
65 wrappedAnalyzer = ctor.newInstance(Version.LUCENE_CURRENT);
66 } catch (NoSuchMethodException e) {
67 // otherwise use default ctor
68 wrappedAnalyzer = clazz.newInstance();
71 ShingleAnalyzerWrapper analyzer = new ShingleAnalyzerWrapper(
73 ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
75 ShingleFilter.TOKEN_SEPARATOR,
78 getRunData().setAnalyzer(analyzer);
82 public int doLogic() throws Exception {
86 ("Changed Analyzer to: ShingleAnalyzerWrapper, wrapping ShingleFilter over"
88 } catch (Exception e) {
89 throw new RuntimeException("Error creating Analyzer", e);
95 public void setParams(String params) {
96 super.setParams(params);
97 StringTokenizer st = new StringTokenizer(params, ",");
98 while (st.hasMoreTokens()) {
99 String param = st.nextToken();
100 StringTokenizer expr = new StringTokenizer(param, ":");
101 String key = expr.nextToken();
102 String value = expr.nextToken();
103 if (key.equalsIgnoreCase("analyzer")) {
104 analyzerClassName = value;
105 } else if (key.equalsIgnoreCase("outputUnigrams")) {
106 outputUnigrams = Boolean.parseBoolean(value);
107 } else if (key.equalsIgnoreCase("maxShingleSize")) {
108 maxShingleSize = (int)Double.parseDouble(value);
110 throw new RuntimeException("Unknown parameter " + param);
116 public boolean supportsParams() {