pylucene 3.5.0-3

[pylucene.git] / lucene-java-3.5.0 / lucene / src / java / org / apache / lucene / search / function / FieldScoreQuery.java
diff --git a/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/function/FieldScoreQuery.java b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/function/FieldScoreQuery.java

new file mode 100755 (executable)

index 0000000..cd71c15
--- /dev/null
+++ b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/function/FieldScoreQuery.java
@@ -0,0 +1,125 @@
+package org.apache.lucene.search.function;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A query that scores each document as the value of the numeric input field.
+ * <p> 
+ * The query matches all documents, and scores each document according to the numeric 
+ * value of that field. 
+ * <p>
+ * It is assumed, and expected, that:
+ * <ul>
+ *  <li>The field used here is indexed, and has exactly 
+ *      one token in every scored document.</li> 
+ *  <li>Best if this field is un_tokenized.</li>
+ *  <li>That token is parseable to the selected type.</li>
+ * </ul>
+ * <p>  
+ * Combining this query in a FunctionQuery allows much freedom in affecting document scores.
+ * Note, that with this freedom comes responsibility: it is more than likely that the
+ * default Lucene scoring is superior in quality to scoring modified as explained here.
+ * However, in some cases, and certainly for research experiments, this capability may turn useful.
+ * <p>
+ * When constructing this query, select the appropriate type. That type should match the data stored in the
+ * field. So in fact the "right" type should be selected before indexing. Type selection
+ * has effect on the RAM usage: 
+ * <ul>
+ *   <li>{@link Type#BYTE} consumes 1 * maxDocs bytes.</li>
+ *   <li>{@link Type#SHORT} consumes 2 * maxDocs bytes.</li>
+ *   <li>{@link Type#INT} consumes 4 * maxDocs bytes.</li>
+ *   <li>{@link Type#FLOAT} consumes 8 * maxDocs bytes.</li>
+ * </ul>
+ * <p>
+ * <b>Caching:</b>
+ * Values for the numeric field are loaded once and cached in memory for further use with the same IndexReader. 
+ * To take advantage of this, it is extremely important to reuse index-readers or index-searchers, 
+ * otherwise, for instance if for each query a new index reader is opened, large penalties would be 
+ * paid for loading the field values into memory over and over again!
+ * 
+ * @lucene.experimental
+ */
+public class FieldScoreQuery extends ValueSourceQuery {
+
+  /**
+   * Type of score field, indicating how field values are interpreted/parsed.  
+   * <p>
+   * The type selected at search search time should match the data stored in the field. 
+   * Different types have different RAM requirements: 
+   * <ul>
+   *   <li>{@link #BYTE} consumes 1 * maxDocs bytes.</li>
+   *   <li>{@link #SHORT} consumes 2 * maxDocs bytes.</li>
+   *   <li>{@link #INT} consumes 4 * maxDocs bytes.</li>
+   *   <li>{@link #FLOAT} consumes 8 * maxDocs bytes.</li>
+   * </ul>
+   */
+  public static class Type {
+    
+    /** field values are interpreted as numeric byte values. */
+    public static final Type BYTE = new Type("byte"); 
+
+    /** field values are interpreted as numeric short values. */
+    public static final Type SHORT = new Type("short"); 
+
+    /** field values are interpreted as numeric int values. */
+    public static final Type INT = new Type("int"); 
+
+    /** field values are interpreted as numeric float values. */
+    public static final Type FLOAT = new Type("float"); 
+
+    private String typeName;
+    private Type (String name) {
+      this.typeName = name;
+    }
+    /*(non-Javadoc) @see java.lang.Object#toString() */
+    @Override
+    public String toString() {
+      return getClass().getName()+"::"+typeName;
+    }
+  }
+  
+  /**
+   * Create a FieldScoreQuery - a query that scores each document as the value of the numeric input field.
+   * <p>
+   * The <code>type</code> param tells how to parse the field string values into a numeric score value.
+   * @param field the numeric field to be used.
+   * @param type the type of the field: either
+   * {@link Type#BYTE}, {@link Type#SHORT}, {@link Type#INT}, or {@link Type#FLOAT}. 
+   */
+  public FieldScoreQuery(String field, Type type) {
+    super(getValueSource(field,type));
+  }
+
+  // create the appropriate (cached) field value source.  
+  private static ValueSource getValueSource(String field, Type type) {
+    if (type == Type.BYTE) {
+      return new ByteFieldSource(field);
+    }
+    if (type == Type.SHORT) {
+      return new ShortFieldSource(field);
+    }
+    if (type == Type.INT) {
+      return new IntFieldSource(field);
+    }
+    if (type == Type.FLOAT) {
+      return new FloatFieldSource(field);
+    }
+    throw new IllegalArgumentException(type+" is not a known Field Score Query Type!");
+  }
+
+}