X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewShingleAnalyzerTask.java?ds=sidebyside diff --git a/lucene-java-3.5.0/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewShingleAnalyzerTask.java b/lucene-java-3.5.0/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewShingleAnalyzerTask.java new file mode 100644 index 0000000..8b62bfe --- /dev/null +++ b/lucene-java-3.5.0/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewShingleAnalyzerTask.java @@ -0,0 +1,119 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.reflect.Constructor; +import java.util.StringTokenizer; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper; +import org.apache.lucene.analysis.shingle.ShingleFilter; +import org.apache.lucene.benchmark.byTask.PerfRunData; +import org.apache.lucene.util.Version; + +/** + * Task to support benchmarking ShingleFilter / ShingleAnalyzerWrapper + *

+ *

+ *

+ */ +public class NewShingleAnalyzerTask extends PerfTask { + + private String analyzerClassName = "standard.StandardAnalyzer"; + private int maxShingleSize = 2; + private boolean outputUnigrams = true; + + public NewShingleAnalyzerTask(PerfRunData runData) { + super(runData); + } + + private void setAnalyzer() throws Exception { + Class clazz = null; + Analyzer wrappedAnalyzer; + try { + if (analyzerClassName == null || analyzerClassName.equals("")) { + analyzerClassName + = "org.apache.lucene.analysis.standard.StandardAnalyzer"; + } + if (analyzerClassName.indexOf(".") == -1 + || analyzerClassName.startsWith("standard.")) { + //there is no package name, assume o.a.l.analysis + analyzerClassName = "org.apache.lucene.analysis." + analyzerClassName; + } + clazz = Class.forName(analyzerClassName).asSubclass(Analyzer.class); + // first try to use a ctor with version parameter (needed for many new + // Analyzers that have no default one anymore) + Constructor ctor = clazz.getConstructor(Version.class); + wrappedAnalyzer = ctor.newInstance(Version.LUCENE_CURRENT); + } catch (NoSuchMethodException e) { + // otherwise use default ctor + wrappedAnalyzer = clazz.newInstance(); + } + + ShingleAnalyzerWrapper analyzer = new ShingleAnalyzerWrapper( + wrappedAnalyzer, + ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, + maxShingleSize, + ShingleFilter.TOKEN_SEPARATOR, + outputUnigrams, + false); + getRunData().setAnalyzer(analyzer); + } + + @Override + public int doLogic() throws Exception { + try { + setAnalyzer(); + System.out.println + ("Changed Analyzer to: ShingleAnalyzerWrapper, wrapping ShingleFilter over" + + analyzerClassName); + } catch (Exception e) { + throw new RuntimeException("Error creating Analyzer", e); + } + return 1; + } + + @Override + public void setParams(String params) { + super.setParams(params); + StringTokenizer st = new StringTokenizer(params, ","); + while (st.hasMoreTokens()) { + String param = st.nextToken(); + StringTokenizer expr = new StringTokenizer(param, ":"); + String key = expr.nextToken(); + String value = expr.nextToken(); + if (key.equalsIgnoreCase("analyzer")) { + analyzerClassName = value; + } else if (key.equalsIgnoreCase("outputUnigrams")) { + outputUnigrams = Boolean.parseBoolean(value); + } else if (key.equalsIgnoreCase("maxShingleSize")) { + maxShingleSize = (int)Double.parseDouble(value); + } else { + throw new RuntimeException("Unknown parameter " + param); + } + } + } + + @Override + public boolean supportsParams() { + return true; + } +}