X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java diff --git a/lucene-java-3.5.0/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java b/lucene-java-3.5.0/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java new file mode 100644 index 0000000..5805b42 --- /dev/null +++ b/lucene-java-3.5.0/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java @@ -0,0 +1,159 @@ +package org.apache.lucene.index; + +/** + * Copyright 2006 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.File; +import java.util.Date; + +import org.apache.lucene.search.Similarity; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.util.StringHelper; + +/** + * Given a directory and a list of fields, updates the fieldNorms in place for every document. + * + * If Similarity class is specified, uses its computeNorm method to set norms. + * If -n command line argument is used, removed field norms, as if + * {@link org.apache.lucene.document.Field.Index}.NO_NORMS was used. + * + *

+ * NOTE: This will overwrite any length normalization or field/document boosts. + *

+ * + */ +public class FieldNormModifier { + + /** + * Command Line Execution method. + * + *
+   * Usage: FieldNormModifier /path/index <package.SimilarityClassName | -n> field1 field2 ...
+   * 
+ */ + public static void main(String[] args) throws IOException { + if (args.length < 3) { + System.err.println("Usage: FieldNormModifier [field2] ..."); + System.exit(1); + } + + Similarity s = null; + if (!args[1].equals("-n")) { + try { + s = Class.forName(args[1]).asSubclass(Similarity.class).newInstance(); + } catch (Exception e) { + System.err.println("Couldn't instantiate similarity with empty constructor: " + args[1]); + e.printStackTrace(System.err); + System.exit(1); + } + } + + Directory d = FSDirectory.open(new File(args[0])); + FieldNormModifier fnm = new FieldNormModifier(d, s); + + for (int i = 2; i < args.length; i++) { + System.out.print("Updating field: " + args[i] + " " + (new Date()).toString() + " ... "); + fnm.reSetNorms(args[i]); + System.out.println(new Date().toString()); + } + + d.close(); + } + + + private Directory dir; + private Similarity sim; + + /** + * Constructor for code that wishes to use this class programmatically + * If Similarity is null, kill the field norms. + * + * @param d the Directory to modify + * @param s the Similarity to use (can be null) + */ + public FieldNormModifier(Directory d, Similarity s) { + dir = d; + sim = s; + } + + /** + * Resets the norms for the specified field. + * + *

+ * Opens a new IndexReader on the Directory given to this instance, + * modifies the norms (either using the Similarity given to this instance, or by using fake norms, + * and closes the IndexReader. + *

+ * + * @param field the field whose norms should be reset + */ + public void reSetNorms(String field) throws IOException { + String fieldName = StringHelper.intern(field); + int[] termCounts = new int[0]; + + IndexReader reader = null; + TermEnum termEnum = null; + TermDocs termDocs = null; + try { + reader = IndexReader.open(dir, true); + termCounts = new int[reader.maxDoc()]; + try { + termEnum = reader.terms(new Term(field)); + try { + termDocs = reader.termDocs(); + do { + Term term = termEnum.term(); + if (term != null && term.field().equals(fieldName)) { + termDocs.seek(termEnum.term()); + while (termDocs.next()) { + termCounts[termDocs.doc()] += termDocs.freq(); + } + } + } while (termEnum.next()); + + } finally { + if (null != termDocs) termDocs.close(); + } + } finally { + if (null != termEnum) termEnum.close(); + } + } finally { + if (null != reader) reader.close(); + } + + try { + reader = IndexReader.open(dir, false); + final FieldInvertState invertState = new FieldInvertState(); + invertState.setBoost(1.0f); + for (int d = 0; d < termCounts.length; d++) { + if (! reader.isDeleted(d)) { + if (sim == null) + reader.setNorm(d, fieldName, Similarity.encodeNorm(1.0f)); + else { + invertState.setLength(termCounts[d]); + reader.setNorm(d, fieldName, sim.encodeNormValue(sim.computeNorm(fieldName, invertState))); + } + } + } + + } finally { + if (null != reader) reader.close(); + } + } + +}