X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java?ds=sidebyside diff --git a/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java new file mode 100644 index 0000000..96f896c --- /dev/null +++ b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java @@ -0,0 +1,286 @@ +package org.apache.lucene.search; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.Set; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; + +/** + * A query that generates the union of documents produced by its subqueries, and that scores each document with the maximum + * score for that document as produced by any subquery, plus a tie breaking increment for any additional matching subqueries. + * This is useful when searching for a word in multiple fields with different boost factors (so that the fields cannot be + * combined equivalently into a single search field). We want the primary score to be the one associated with the highest boost, + * not the sum of the field scores (as BooleanQuery would give). + * If the query is "albino elephant" this ensures that "albino" matching one field and "elephant" matching + * another gets a higher score than "albino" matching both fields. + * To get this result, use both BooleanQuery and DisjunctionMaxQuery: for each term a DisjunctionMaxQuery searches for it in + * each field, while the set of these DisjunctionMaxQuery's is combined into a BooleanQuery. + * The tie breaker capability allows results that include the same term in multiple fields to be judged better than results that + * include this term in only the best of those multiple fields, without confusing this with the better case of two different terms + * in the multiple fields. + */ +public class DisjunctionMaxQuery extends Query implements Iterable { + + /* The subqueries */ + private ArrayList disjuncts = new ArrayList(); + + /* Multiple of the non-max disjunct scores added into our final score. Non-zero values support tie-breaking. */ + private float tieBreakerMultiplier = 0.0f; + + /** Creates a new empty DisjunctionMaxQuery. Use add() to add the subqueries. + * @param tieBreakerMultiplier the score of each non-maximum disjunct for a document is multiplied by this weight + * and added into the final score. If non-zero, the value should be small, on the order of 0.1, which says that + * 10 occurrences of word in a lower-scored field that is also in a higher scored field is just as good as a unique + * word in the lower scored field (i.e., one that is not in any higher scored field. + */ + public DisjunctionMaxQuery(float tieBreakerMultiplier) { + this.tieBreakerMultiplier = tieBreakerMultiplier; + } + + /** + * Creates a new DisjunctionMaxQuery + * @param disjuncts a Collection of all the disjuncts to add + * @param tieBreakerMultiplier the weight to give to each matching non-maximum disjunct + */ + public DisjunctionMaxQuery(Collection disjuncts, float tieBreakerMultiplier) { + this.tieBreakerMultiplier = tieBreakerMultiplier; + add(disjuncts); + } + + /** Add a subquery to this disjunction + * @param query the disjunct added + */ + public void add(Query query) { + disjuncts.add(query); + } + + /** Add a collection of disjuncts to this disjunction + * via Iterable + */ + public void add(Collection disjuncts) { + this.disjuncts.addAll(disjuncts); + } + + /** An Iterator over the disjuncts */ + public Iterator iterator() { + return disjuncts.iterator(); + } + + /** + * Expert: the Weight for DisjunctionMaxQuery, used to + * normalize, score and explain these queries. + * + *

NOTE: this API and implementation is subject to + * change suddenly in the next release.

+ */ + protected class DisjunctionMaxWeight extends Weight { + /** The Similarity implementation. */ + protected Similarity similarity; + + /** The Weights for our subqueries, in 1-1 correspondence with disjuncts */ + protected ArrayList weights = new ArrayList(); // The Weight's for our subqueries, in 1-1 correspondence with disjuncts + + /** Construct the Weight for this Query searched by searcher. Recursively construct subquery weights. */ + public DisjunctionMaxWeight(Searcher searcher) throws IOException { + this.similarity = searcher.getSimilarity(); + for (Query disjunctQuery : disjuncts) { + weights.add(disjunctQuery.createWeight(searcher)); + } + } + + /** Return our associated DisjunctionMaxQuery */ + @Override + public Query getQuery() { return DisjunctionMaxQuery.this; } + + /** Return our boost */ + @Override + public float getValue() { return getBoost(); } + + /** Compute the sub of squared weights of us applied to our subqueries. Used for normalization. */ + @Override + public float sumOfSquaredWeights() throws IOException { + float max = 0.0f, sum = 0.0f; + for (Weight currentWeight : weights) { + float sub = currentWeight.sumOfSquaredWeights(); + sum += sub; + max = Math.max(max, sub); + + } + float boost = getBoost(); + return (((sum - max) * tieBreakerMultiplier * tieBreakerMultiplier) + max) * boost * boost; + } + + /** Apply the computed normalization factor to our subqueries */ + @Override + public void normalize(float norm) { + norm *= getBoost(); // Incorporate our boost + for (Weight wt : weights) { + wt.normalize(norm); + } + } + + /** Create the scorer used to score our associated DisjunctionMaxQuery */ + @Override + public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, + boolean topScorer) throws IOException { + Scorer[] scorers = new Scorer[weights.size()]; + int idx = 0; + for (Weight w : weights) { + Scorer subScorer = w.scorer(reader, true, false); + if (subScorer != null && subScorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + scorers[idx++] = subScorer; + } + } + if (idx == 0) return null; // all scorers did not have documents + DisjunctionMaxScorer result = new DisjunctionMaxScorer(this, tieBreakerMultiplier, similarity, scorers, idx); + return result; + } + + /** Explain the score we computed for doc */ + @Override + public Explanation explain(IndexReader reader, int doc) throws IOException { + if (disjuncts.size() == 1) return weights.get(0).explain(reader,doc); + ComplexExplanation result = new ComplexExplanation(); + float max = 0.0f, sum = 0.0f; + result.setDescription(tieBreakerMultiplier == 0.0f ? "max of:" : "max plus " + tieBreakerMultiplier + " times others of:"); + for (Weight wt : weights) { + Explanation e = wt.explain(reader, doc); + if (e.isMatch()) { + result.setMatch(Boolean.TRUE); + result.addDetail(e); + sum += e.getValue(); + max = Math.max(max, e.getValue()); + } + } + result.setValue(max + (sum - max) * tieBreakerMultiplier); + return result; + } + + } // end of DisjunctionMaxWeight inner class + + /** Create the Weight used to score us */ + @Override + public Weight createWeight(Searcher searcher) throws IOException { + return new DisjunctionMaxWeight(searcher); + } + + /** Optimize our representation and our subqueries representations + * @param reader the IndexReader we query + * @return an optimized copy of us (which may not be a copy if there is nothing to optimize) */ + @Override + public Query rewrite(IndexReader reader) throws IOException { + int numDisjunctions = disjuncts.size(); + if (numDisjunctions == 1) { + Query singleton = disjuncts.get(0); + Query result = singleton.rewrite(reader); + if (getBoost() != 1.0f) { + if (result == singleton) result = (Query)result.clone(); + result.setBoost(getBoost() * result.getBoost()); + } + return result; + } + DisjunctionMaxQuery clone = null; + for (int i = 0 ; i < numDisjunctions; i++) { + Query clause = disjuncts.get(i); + Query rewrite = clause.rewrite(reader); + if (rewrite != clause) { + if (clone == null) clone = (DisjunctionMaxQuery)this.clone(); + clone.disjuncts.set(i, rewrite); + } + } + if (clone != null) return clone; + else return this; + } + + /** Create a shallow copy of us -- used in rewriting if necessary + * @return a copy of us (but reuse, don't copy, our subqueries) */ + @Override @SuppressWarnings("unchecked") + public Object clone() { + DisjunctionMaxQuery clone = (DisjunctionMaxQuery)super.clone(); + clone.disjuncts = (ArrayList) this.disjuncts.clone(); + return clone; + } + + // inherit javadoc + @Override + public void extractTerms(Set terms) { + for (Query query : disjuncts) { + query.extractTerms(terms); + } + } + + /** Prettyprint us. + * @param field the field to which we are applied + * @return a string that shows what we do, of the form "(disjunct1 | disjunct2 | ... | disjunctn)^boost" + */ + @Override + public String toString(String field) { + StringBuilder buffer = new StringBuilder(); + buffer.append("("); + int numDisjunctions = disjuncts.size(); + for (int i = 0 ; i < numDisjunctions; i++) { + Query subquery = disjuncts.get(i); + if (subquery instanceof BooleanQuery) { // wrap sub-bools in parens + buffer.append("("); + buffer.append(subquery.toString(field)); + buffer.append(")"); + } + else buffer.append(subquery.toString(field)); + if (i != numDisjunctions-1) buffer.append(" | "); + } + buffer.append(")"); + if (tieBreakerMultiplier != 0.0f) { + buffer.append("~"); + buffer.append(tieBreakerMultiplier); + } + if (getBoost() != 1.0) { + buffer.append("^"); + buffer.append(getBoost()); + } + return buffer.toString(); + } + + /** Return true iff we represent the same query as o + * @param o another object + * @return true iff o is a DisjunctionMaxQuery with the same boost and the same subqueries, in the same order, as us + */ + @Override + public boolean equals(Object o) { + if (! (o instanceof DisjunctionMaxQuery) ) return false; + DisjunctionMaxQuery other = (DisjunctionMaxQuery)o; + return this.getBoost() == other.getBoost() + && this.tieBreakerMultiplier == other.tieBreakerMultiplier + && this.disjuncts.equals(other.disjuncts); + } + + /** Compute a hash code for hashing us + * @return the hash code + */ + @Override + public int hashCode() { + return Float.floatToIntBits(getBoost()) + + Float.floatToIntBits(tieBreakerMultiplier) + + disjuncts.hashCode(); + } + +}