X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java diff --git a/lucene-java-3.4.0/lucene/contrib/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java b/lucene-java-3.4.0/lucene/contrib/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java deleted file mode 100644 index cda2f07..0000000 --- a/lucene-java-3.4.0/lucene/contrib/misc/src/java/org/apache/lucene/misc/SweetSpotSimilarity.java +++ /dev/null @@ -1,268 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.misc; - -import org.apache.lucene.search.DefaultSimilarity; -import org.apache.lucene.index.FieldInvertState; - -import java.util.Map; -import java.util.HashMap; - -/** - * A similarity with a lengthNorm that provides for a "plateau" of - * equally good lengths, and tf helper functions. - * - *

- * For lengthNorm, A global min/max can be specified to define the - * plateau of lengths that should all have a norm of 1.0. - * Below the min, and above the max the lengthNorm drops off in a - * sqrt function. - *

- *

- * A per field min/max can be specified if different fields have - * different sweet spots. - *

- * - *

- * For tf, baselineTf and hyperbolicTf functions are provided, which - * subclasses can choose between. - *

- * - */ -public class SweetSpotSimilarity extends DefaultSimilarity { - - private int ln_min = 1; - private int ln_max = 1; - private float ln_steep = 0.5f; - - private Map ln_maxs = new HashMap(7); - private Map ln_mins = new HashMap(7); - private Map ln_steeps = new HashMap(7); - private Map ln_overlaps = new HashMap(7); - - private float tf_base = 0.0f; - private float tf_min = 0.0f; - - private float tf_hyper_min = 0.0f; - private float tf_hyper_max = 2.0f; - private double tf_hyper_base = 1.3d; - private float tf_hyper_xoffset = 10.0f; - - public SweetSpotSimilarity() { - super(); - } - - /** - * Sets the baseline and minimum function variables for baselineTf - * - * @see #baselineTf - */ - public void setBaselineTfFactors(float base, float min) { - tf_min = min; - tf_base = base; - } - - /** - * Sets the function variables for the hyperbolicTf functions - * - * @param min the minimum tf value to ever be returned (default: 0.0) - * @param max the maximum tf value to ever be returned (default: 2.0) - * @param base the base value to be used in the exponential for the hyperbolic function (default: e) - * @param xoffset the midpoint of the hyperbolic function (default: 10.0) - * @see #hyperbolicTf - */ - public void setHyperbolicTfFactors(float min, float max, - double base, float xoffset) { - tf_hyper_min = min; - tf_hyper_max = max; - tf_hyper_base = base; - tf_hyper_xoffset = xoffset; - } - - /** - * Sets the default function variables used by lengthNorm when no field - * specific variables have been set. - * - * @see #lengthNorm - */ - public void setLengthNormFactors(int min, int max, float steepness) { - this.ln_min = min; - this.ln_max = max; - this.ln_steep = steepness; - } - - /** - * Sets the function variables used by lengthNorm for a specific named field. - * - * @param field field name - * @param min minimum value - * @param max maximum value - * @param steepness steepness of the curve - * @param discountOverlaps if true, numOverlapTokens will be - * subtracted from numTokens; if false then - * numOverlapTokens will be assumed to be 0 (see - * {@link DefaultSimilarity#computeNorm(String, FieldInvertState)} for details). - * - * @see #lengthNorm - */ - public void setLengthNormFactors(String field, int min, int max, - float steepness, boolean discountOverlaps) { - ln_mins.put(field, Integer.valueOf(min)); - ln_maxs.put(field, Integer.valueOf(max)); - ln_steeps.put(field, Float.valueOf(steepness)); - ln_overlaps.put(field, new Boolean(discountOverlaps)); - } - - /** - * Implemented as state.getBoost() * - * lengthNorm(fieldName, numTokens) where - * numTokens does not count overlap tokens if - * discountOverlaps is true by default or true for this - * specific field. */ - @Override - public float computeNorm(String fieldName, FieldInvertState state) { - final int numTokens; - boolean overlaps = discountOverlaps; - if (ln_overlaps.containsKey(fieldName)) { - overlaps = ln_overlaps.get(fieldName).booleanValue(); - } - if (overlaps) - numTokens = state.getLength() - state.getNumOverlap(); - else - numTokens = state.getLength(); - - return state.getBoost() * computeLengthNorm(fieldName, numTokens); - } - - /** - * Implemented as: - * - * 1/sqrt( steepness * (abs(x-min) + abs(x-max) - (max-min)) + 1 ) - * . - * - *

- * This degrades to 1/sqrt(x) when min and max are both 1 and - * steepness is 0.5 - *

- * - *

- * :TODO: potential optimization is to just flat out return 1.0f if numTerms - * is between min and max. - *

- * - * @see #setLengthNormFactors - */ - public float computeLengthNorm(String fieldName, int numTerms) { - int l = ln_min; - int h = ln_max; - float s = ln_steep; - - if (ln_mins.containsKey(fieldName)) { - l = ln_mins.get(fieldName).intValue(); - } - if (ln_maxs.containsKey(fieldName)) { - h = ln_maxs.get(fieldName).intValue(); - } - if (ln_steeps.containsKey(fieldName)) { - s = ln_steeps.get(fieldName).floatValue(); - } - - return (float) - (1.0f / - Math.sqrt - ( - ( - s * - (float)(Math.abs(numTerms - l) + Math.abs(numTerms - h) - (h-l)) - ) - + 1.0f - ) - ); - } - - /** - * Delegates to baselineTf - * - * @see #baselineTf - */ - @Override - public float tf(int freq) { - return baselineTf(freq); - } - - /** - * Implemented as: - * - * (x <= min) ? base : sqrt(x+(base**2)-min) - * - * ...but with a special case check for 0. - *

- * This degrates to sqrt(x) when min and base are both 0 - *

- * - * @see #setBaselineTfFactors - */ - public float baselineTf(float freq) { - - if (0.0f == freq) return 0.0f; - - return (freq <= tf_min) - ? tf_base - : (float)Math.sqrt(freq + (tf_base * tf_base) - tf_min); - } - - /** - * Uses a hyperbolic tangent function that allows for a hard max... - * - * - * tf(x)=min+(max-min)/2*(((base**(x-xoffset)-base**-(x-xoffset))/(base**(x-xoffset)+base**-(x-xoffset)))+1) - * - * - *

- * This code is provided as a convenience for subclasses that want - * to use a hyperbolic tf function. - *

- * - * @see #setHyperbolicTfFactors - */ - public float hyperbolicTf(float freq) { - if (0.0f == freq) return 0.0f; - - final float min = tf_hyper_min; - final float max = tf_hyper_max; - final double base = tf_hyper_base; - final float xoffset = tf_hyper_xoffset; - final double x = (double)(freq - xoffset); - - final float result = min + - (float)( - (max-min) / 2.0f - * - ( - ( ( Math.pow(base,x) - Math.pow(base,-x) ) - / ( Math.pow(base,x) + Math.pow(base,-x) ) - ) - + 1.0d - ) - ); - - return Float.isNaN(result) ? max : result; - - } - -}