1 package org.apache.lucene.facet.search.sampling;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
21 * Parameters for sampling, dictating whether sampling is to take place and how.
23 * @lucene.experimental
25 public class SamplingParams {
28 * Default factor by which more results are requested over the sample set.
29 * @see SamplingParams#getOversampleFactor()
31 public static final double DEFAULT_OVERSAMPLE_FACTOR = 2d;
34 * Default ratio between size of sample to original size of document set.
35 * @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs)
37 public static final double DEFAULT_SAMPLE_RATIO = 0.01;
40 * Default maximum size of sample.
41 * @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs)
43 public static final int DEFAULT_MAX_SAMPLE_SIZE = 10000;
46 * Default minimum size of sample.
47 * @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs)
49 public static final int DEFAULT_MIN_SAMPLE_SIZE = 100;
52 * Default sampling threshold, if number of results is less than this number - no sampling will take place
53 * @see SamplingParams#getSampleRatio()
55 public static final int DEFAULT_SAMPLING_THRESHOLD = 75000;
57 private int maxSampleSize = DEFAULT_MAX_SAMPLE_SIZE;
58 private int minSampleSize = DEFAULT_MIN_SAMPLE_SIZE;
59 private double sampleRatio = DEFAULT_SAMPLE_RATIO;
60 private int samplingThreshold = DEFAULT_SAMPLING_THRESHOLD;
61 private double oversampleFactor = DEFAULT_OVERSAMPLE_FACTOR;
64 * Return the maxSampleSize.
65 * In no case should the resulting sample size exceed this value.
66 * @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs)
68 public final int getMaxSampleSize() {
73 * Return the minSampleSize.
74 * In no case should the resulting sample size be smaller than this value.
75 * @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs)
77 public final int getMinSampleSize() {
82 * @return the sampleRatio
83 * @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs)
85 public final double getSampleRatio() {
90 * Return the samplingThreshold.
91 * Sampling would be performed only for document sets larger than this.
93 public final int getSamplingThreshold() {
94 return samplingThreshold;
98 * @param maxSampleSize
99 * the maxSampleSize to set
100 * @see #getMaxSampleSize()
102 public void setMaxSampleSize(int maxSampleSize) {
103 this.maxSampleSize = maxSampleSize;
107 * @param minSampleSize
108 * the minSampleSize to set
109 * @see #getMinSampleSize()
111 public void setMinSampleSize(int minSampleSize) {
112 this.minSampleSize = minSampleSize;
117 * the sampleRatio to set
118 * @see #getSampleRatio()
120 public void setSampleRatio(double sampleRatio) {
121 this.sampleRatio = sampleRatio;
125 * Set a sampling-threshold
126 * @see #getSamplingThreshold()
128 public void setSampingThreshold(int sampingThreshold) {
129 this.samplingThreshold = sampingThreshold;
133 * Check validity of sampling settings, making sure that
135 * <li> <code>minSampleSize <= maxSampleSize <= samplingThreshold </code></li>
136 * <li> <code>0 < samplingRatio <= 1 </code></li>
139 * @return true if valid, false otherwise
141 public boolean validate() {
143 samplingThreshold >= maxSampleSize &&
144 maxSampleSize >= minSampleSize &&
150 * Return the oversampleFactor. When sampling, we would collect that much more
151 * results, so that later, when selecting top out of these, chances are higher
152 * to get actual best results. Note that having this value larger than 1 only
153 * makes sense when using a SampleFixer which finds accurate results, such as
154 * <code>TakmiSampleFixer</code>. When this value is smaller than 1, it is
155 * ignored and no oversampling takes place.
157 public final double getOversampleFactor() {
158 return oversampleFactor;
162 * @param oversampleFactor the oversampleFactor to set
163 * @see #getOversampleFactor()
165 public void setOversampleFactor(double oversampleFactor) {
166 this.oversampleFactor = oversampleFactor;