+++ /dev/null
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.misc;
-
-import org.apache.lucene.search.DefaultSimilarity;
-import org.apache.lucene.index.FieldInvertState;
-
-import java.util.Map;
-import java.util.HashMap;
-
-/**
- * A similarity with a lengthNorm that provides for a "plateau" of
- * equally good lengths, and tf helper functions.
- *
- * <p>
- * For lengthNorm, A global min/max can be specified to define the
- * plateau of lengths that should all have a norm of 1.0.
- * Below the min, and above the max the lengthNorm drops off in a
- * sqrt function.
- * </p>
- * <p>
- * A per field min/max can be specified if different fields have
- * different sweet spots.
- * </p>
- *
- * <p>
- * For tf, baselineTf and hyperbolicTf functions are provided, which
- * subclasses can choose between.
- * </p>
- *
- */
-public class SweetSpotSimilarity extends DefaultSimilarity {
-
- private int ln_min = 1;
- private int ln_max = 1;
- private float ln_steep = 0.5f;
-
- private Map<String,Number> ln_maxs = new HashMap<String,Number>(7);
- private Map<String,Number> ln_mins = new HashMap<String,Number>(7);
- private Map<String,Float> ln_steeps = new HashMap<String,Float>(7);
- private Map<String,Boolean> ln_overlaps = new HashMap<String,Boolean>(7);
-
- private float tf_base = 0.0f;
- private float tf_min = 0.0f;
-
- private float tf_hyper_min = 0.0f;
- private float tf_hyper_max = 2.0f;
- private double tf_hyper_base = 1.3d;
- private float tf_hyper_xoffset = 10.0f;
-
- public SweetSpotSimilarity() {
- super();
- }
-
- /**
- * Sets the baseline and minimum function variables for baselineTf
- *
- * @see #baselineTf
- */
- public void setBaselineTfFactors(float base, float min) {
- tf_min = min;
- tf_base = base;
- }
-
- /**
- * Sets the function variables for the hyperbolicTf functions
- *
- * @param min the minimum tf value to ever be returned (default: 0.0)
- * @param max the maximum tf value to ever be returned (default: 2.0)
- * @param base the base value to be used in the exponential for the hyperbolic function (default: e)
- * @param xoffset the midpoint of the hyperbolic function (default: 10.0)
- * @see #hyperbolicTf
- */
- public void setHyperbolicTfFactors(float min, float max,
- double base, float xoffset) {
- tf_hyper_min = min;
- tf_hyper_max = max;
- tf_hyper_base = base;
- tf_hyper_xoffset = xoffset;
- }
-
- /**
- * Sets the default function variables used by lengthNorm when no field
- * specific variables have been set.
- *
- * @see #lengthNorm
- */
- public void setLengthNormFactors(int min, int max, float steepness) {
- this.ln_min = min;
- this.ln_max = max;
- this.ln_steep = steepness;
- }
-
- /**
- * Sets the function variables used by lengthNorm for a specific named field.
- *
- * @param field field name
- * @param min minimum value
- * @param max maximum value
- * @param steepness steepness of the curve
- * @param discountOverlaps if true, <code>numOverlapTokens</code> will be
- * subtracted from <code>numTokens</code>; if false then
- * <code>numOverlapTokens</code> will be assumed to be 0 (see
- * {@link DefaultSimilarity#computeNorm(String, FieldInvertState)} for details).
- *
- * @see #lengthNorm
- */
- public void setLengthNormFactors(String field, int min, int max,
- float steepness, boolean discountOverlaps) {
- ln_mins.put(field, Integer.valueOf(min));
- ln_maxs.put(field, Integer.valueOf(max));
- ln_steeps.put(field, Float.valueOf(steepness));
- ln_overlaps.put(field, new Boolean(discountOverlaps));
- }
-
- /**
- * Implemented as <code> state.getBoost() *
- * lengthNorm(fieldName, numTokens) </code> where
- * numTokens does not count overlap tokens if
- * discountOverlaps is true by default or true for this
- * specific field. */
- @Override
- public float computeNorm(String fieldName, FieldInvertState state) {
- final int numTokens;
- boolean overlaps = discountOverlaps;
- if (ln_overlaps.containsKey(fieldName)) {
- overlaps = ln_overlaps.get(fieldName).booleanValue();
- }
- if (overlaps)
- numTokens = state.getLength() - state.getNumOverlap();
- else
- numTokens = state.getLength();
-
- return state.getBoost() * computeLengthNorm(fieldName, numTokens);
- }
-
- /**
- * Implemented as:
- * <code>
- * 1/sqrt( steepness * (abs(x-min) + abs(x-max) - (max-min)) + 1 )
- * </code>.
- *
- * <p>
- * This degrades to <code>1/sqrt(x)</code> when min and max are both 1 and
- * steepness is 0.5
- * </p>
- *
- * <p>
- * :TODO: potential optimization is to just flat out return 1.0f if numTerms
- * is between min and max.
- * </p>
- *
- * @see #setLengthNormFactors
- */
- public float computeLengthNorm(String fieldName, int numTerms) {
- int l = ln_min;
- int h = ln_max;
- float s = ln_steep;
-
- if (ln_mins.containsKey(fieldName)) {
- l = ln_mins.get(fieldName).intValue();
- }
- if (ln_maxs.containsKey(fieldName)) {
- h = ln_maxs.get(fieldName).intValue();
- }
- if (ln_steeps.containsKey(fieldName)) {
- s = ln_steeps.get(fieldName).floatValue();
- }
-
- return (float)
- (1.0f /
- Math.sqrt
- (
- (
- s *
- (float)(Math.abs(numTerms - l) + Math.abs(numTerms - h) - (h-l))
- )
- + 1.0f
- )
- );
- }
-
- /**
- * Delegates to baselineTf
- *
- * @see #baselineTf
- */
- @Override
- public float tf(int freq) {
- return baselineTf(freq);
- }
-
- /**
- * Implemented as:
- * <code>
- * (x <= min) ? base : sqrt(x+(base**2)-min)
- * </code>
- * ...but with a special case check for 0.
- * <p>
- * This degrates to <code>sqrt(x)</code> when min and base are both 0
- * </p>
- *
- * @see #setBaselineTfFactors
- */
- public float baselineTf(float freq) {
-
- if (0.0f == freq) return 0.0f;
-
- return (freq <= tf_min)
- ? tf_base
- : (float)Math.sqrt(freq + (tf_base * tf_base) - tf_min);
- }
-
- /**
- * Uses a hyperbolic tangent function that allows for a hard max...
- *
- * <code>
- * tf(x)=min+(max-min)/2*(((base**(x-xoffset)-base**-(x-xoffset))/(base**(x-xoffset)+base**-(x-xoffset)))+1)
- * </code>
- *
- * <p>
- * This code is provided as a convenience for subclasses that want
- * to use a hyperbolic tf function.
- * </p>
- *
- * @see #setHyperbolicTfFactors
- */
- public float hyperbolicTf(float freq) {
- if (0.0f == freq) return 0.0f;
-
- final float min = tf_hyper_min;
- final float max = tf_hyper_max;
- final double base = tf_hyper_base;
- final float xoffset = tf_hyper_xoffset;
- final double x = (double)(freq - xoffset);
-
- final float result = min +
- (float)(
- (max-min) / 2.0f
- *
- (
- ( ( Math.pow(base,x) - Math.pow(base,-x) )
- / ( Math.pow(base,x) + Math.pow(base,-x) )
- )
- + 1.0d
- )
- );
-
- return Float.isNaN(result) ? max : result;
-
- }
-
-}