pylucene 3.5.0-3

[pylucene.git] / lucene-java-3.5.0 / lucene / contrib / misc / src / java / org / apache / lucene / index / FieldNormModifier.java
diff --git a/lucene-java-3.5.0/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java b/lucene-java-3.5.0/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java

new file mode 100644 (file)

index 0000000..5805b42
--- /dev/null
+++ b/lucene-java-3.5.0/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java
@@ -0,0 +1,159 @@
+package org.apache.lucene.index;
+
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.File;
+import java.util.Date;
+
+import org.apache.lucene.search.Similarity;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.StringHelper;
+
+/**
+ * Given a directory and a list of fields, updates the fieldNorms in place for every document.
+ * 
+ * If Similarity class is specified, uses its computeNorm method to set norms.
+ * If -n command line argument is used, removed field norms, as if 
+ * {@link org.apache.lucene.document.Field.Index}.NO_NORMS was used.
+ *
+ * <p>
+ * NOTE: This will overwrite any length normalization or field/document boosts.
+ * </p>
+ *
+ */
+public class FieldNormModifier {
+
+  /**
+   * Command Line Execution method.
+   *
+   * <pre>
+   * Usage: FieldNormModifier /path/index &lt;package.SimilarityClassName | -n&gt; field1 field2 ...
+   * </pre>
+   */
+  public static void main(String[] args) throws IOException {
+    if (args.length < 3) {
+      System.err.println("Usage: FieldNormModifier <index> <package.SimilarityClassName | -n> <field1> [field2] ...");
+      System.exit(1);
+    }
+
+    Similarity s = null;
+    if (!args[1].equals("-n")) {
+      try {
+        s = Class.forName(args[1]).asSubclass(Similarity.class).newInstance();
+      } catch (Exception e) {
+        System.err.println("Couldn't instantiate similarity with empty constructor: " + args[1]);
+        e.printStackTrace(System.err);
+        System.exit(1);
+      }
+    }
+
+    Directory d = FSDirectory.open(new File(args[0]));
+    FieldNormModifier fnm = new FieldNormModifier(d, s);
+
+    for (int i = 2; i < args.length; i++) {
+      System.out.print("Updating field: " + args[i] + " " + (new Date()).toString() + " ... ");
+      fnm.reSetNorms(args[i]);
+      System.out.println(new Date().toString());
+    }
+    
+    d.close();
+  }
+  
+  
+  private Directory dir;
+  private Similarity sim;
+  
+  /**
+   * Constructor for code that wishes to use this class programmatically
+   * If Similarity is null, kill the field norms.
+   *
+   * @param d the Directory to modify
+   * @param s the Similarity to use (can be null)
+   */
+  public FieldNormModifier(Directory d, Similarity s) {
+    dir = d;
+    sim = s;
+  }
+
+  /**
+   * Resets the norms for the specified field.
+   *
+   * <p>
+   * Opens a new IndexReader on the Directory given to this instance,
+   * modifies the norms (either using the Similarity given to this instance, or by using fake norms,
+   * and closes the IndexReader.
+   * </p>
+   *
+   * @param field the field whose norms should be reset
+   */
+  public void reSetNorms(String field) throws IOException {
+    String fieldName = StringHelper.intern(field);
+    int[] termCounts = new int[0];
+    
+    IndexReader reader = null;
+    TermEnum termEnum = null;
+    TermDocs termDocs = null;
+    try {
+      reader = IndexReader.open(dir, true);
+      termCounts = new int[reader.maxDoc()];
+      try {
+        termEnum = reader.terms(new Term(field));
+        try {
+          termDocs = reader.termDocs();
+          do {
+            Term term = termEnum.term();
+            if (term != null && term.field().equals(fieldName)) {
+              termDocs.seek(termEnum.term());
+              while (termDocs.next()) {
+                termCounts[termDocs.doc()] += termDocs.freq();
+              }
+            }
+          } while (termEnum.next());
+          
+        } finally {
+          if (null != termDocs) termDocs.close();
+        }
+      } finally {
+        if (null != termEnum) termEnum.close();
+      }
+    } finally {
+      if (null != reader) reader.close();
+    }
+    
+    try {
+      reader = IndexReader.open(dir, false); 
+      final FieldInvertState invertState = new FieldInvertState();
+      invertState.setBoost(1.0f);
+      for (int d = 0; d < termCounts.length; d++) {
+        if (! reader.isDeleted(d)) {
+          if (sim == null)
+            reader.setNorm(d, fieldName, Similarity.encodeNorm(1.0f));
+          else {
+            invertState.setLength(termCounts[d]);
+            reader.setNorm(d, fieldName, sim.encodeNormValue(sim.computeNorm(fieldName, invertState)));
+          }
+        }
+      }
+      
+    } finally {
+      if (null != reader) reader.close();
+    }
+  }
+  
+}