X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java diff --git a/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java new file mode 100644 index 0000000..e8a92a2 --- /dev/null +++ b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -0,0 +1,407 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.*; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultipleTermPositions; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermPositions; +import org.apache.lucene.search.Explanation.IDFExplanation; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.ToStringUtils; + +/** + * MultiPhraseQuery is a generalized version of PhraseQuery, with an added + * method {@link #add(Term[])}. + * To use this class, to search for the phrase "Microsoft app*" first use + * add(Term) on the term "Microsoft", then find all terms that have "app" as + * prefix using IndexReader.terms(Term), and use MultiPhraseQuery.add(Term[] + * terms) to add them to the query. + * + * @version 1.0 + */ +public class MultiPhraseQuery extends Query { + private String field; + private ArrayList termArrays = new ArrayList(); + private ArrayList positions = new ArrayList(); + + private int slop = 0; + + /** Sets the phrase slop for this query. + * @see PhraseQuery#setSlop(int) + */ + public void setSlop(int s) { slop = s; } + + /** Sets the phrase slop for this query. + * @see PhraseQuery#getSlop() + */ + public int getSlop() { return slop; } + + /** Add a single term at the next position in the phrase. + * @see PhraseQuery#add(Term) + */ + public void add(Term term) { add(new Term[]{term}); } + + /** Add multiple terms at the next position in the phrase. Any of the terms + * may match. + * + * @see PhraseQuery#add(Term) + */ + public void add(Term[] terms) { + int position = 0; + if (positions.size() > 0) + position = positions.get(positions.size()-1).intValue() + 1; + + add(terms, position); + } + + /** + * Allows to specify the relative position of terms within the phrase. + * + * @see PhraseQuery#add(Term, int) + * @param terms + * @param position + */ + public void add(Term[] terms, int position) { + if (termArrays.size() == 0) + field = terms[0].field(); + + for (int i = 0; i < terms.length; i++) { + if (terms[i].field() != field) { + throw new IllegalArgumentException( + "All phrase terms must be in the same field (" + field + "): " + + terms[i]); + } + } + + termArrays.add(terms); + positions.add(Integer.valueOf(position)); + } + + /** + * Returns a List of the terms in the multiphrase. + * Do not modify the List or its contents. + */ + public List getTermArrays() { + return Collections.unmodifiableList(termArrays); + } + + /** + * Returns the relative positions of terms in this phrase. + */ + public int[] getPositions() { + int[] result = new int[positions.size()]; + for (int i = 0; i < positions.size(); i++) + result[i] = positions.get(i).intValue(); + return result; + } + + // inherit javadoc + @Override + public void extractTerms(Set terms) { + for (final Term[] arr : termArrays) { + for (final Term term: arr) { + terms.add(term); + } + } + } + + + private class MultiPhraseWeight extends Weight { + private Similarity similarity; + private float value; + private final IDFExplanation idfExp; + private float idf; + private float queryNorm; + private float queryWeight; + + public MultiPhraseWeight(Searcher searcher) + throws IOException { + this.similarity = getSimilarity(searcher); + + // compute idf + ArrayList allTerms = new ArrayList(); + for(final Term[] terms: termArrays) { + for (Term term: terms) { + allTerms.add(term); + } + } + idfExp = similarity.idfExplain(allTerms, searcher); + idf = idfExp.getIdf(); + } + + @Override + public Query getQuery() { return MultiPhraseQuery.this; } + + @Override + public float getValue() { return value; } + + @Override + public float sumOfSquaredWeights() { + queryWeight = idf * getBoost(); // compute query weight + return queryWeight * queryWeight; // square it + } + + @Override + public void normalize(float queryNorm) { + this.queryNorm = queryNorm; + queryWeight *= queryNorm; // normalize query weight + value = queryWeight * idf; // idf for document + } + + @Override + public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { + if (termArrays.size() == 0) // optimize zero-term case + return null; + + PhraseQuery.PostingsAndFreq[] postingsFreqs = new PhraseQuery.PostingsAndFreq[termArrays.size()]; + + for (int pos=0; pos 1) { + p = new MultipleTermPositions(reader, terms); + + // coarse -- this overcounts since a given doc can + // have more than one terms: + docFreq = 0; + for(int termIdx=0;termIdx i = termArrays.iterator(); + while (i.hasNext()) { + Term[] terms = i.next(); + if (terms.length > 1) { + buffer.append("("); + for (int j = 0; j < terms.length; j++) { + buffer.append(terms[j].text()); + if (j < terms.length-1) + buffer.append(" "); + } + buffer.append(")"); + } else { + buffer.append(terms[0].text()); + } + if (i.hasNext()) + buffer.append(" "); + } + buffer.append("\""); + + if (slop != 0) { + buffer.append("~"); + buffer.append(slop); + } + + buffer.append(ToStringUtils.boost(getBoost())); + + return buffer.toString(); + } + + + /** Returns true if o is equal to this. */ + @Override + public boolean equals(Object o) { + if (!(o instanceof MultiPhraseQuery)) return false; + MultiPhraseQuery other = (MultiPhraseQuery)o; + return this.getBoost() == other.getBoost() + && this.slop == other.slop + && termArraysEquals(this.termArrays, other.termArrays) + && this.positions.equals(other.positions); + } + + /** Returns a hash code value for this object.*/ + @Override + public int hashCode() { + return Float.floatToIntBits(getBoost()) + ^ slop + ^ termArraysHashCode() + ^ positions.hashCode() + ^ 0x4AC65113; + } + + // Breakout calculation of the termArrays hashcode + private int termArraysHashCode() { + int hashCode = 1; + for (final Term[] termArray: termArrays) { + hashCode = 31 * hashCode + + (termArray == null ? 0 : Arrays.hashCode(termArray)); + } + return hashCode; + } + + // Breakout calculation of the termArrays equals + private boolean termArraysEquals(List termArrays1, List termArrays2) { + if (termArrays1.size() != termArrays2.size()) { + return false; + } + ListIterator iterator1 = termArrays1.listIterator(); + ListIterator iterator2 = termArrays2.listIterator(); + while (iterator1.hasNext()) { + Term[] termArray1 = iterator1.next(); + Term[] termArray2 = iterator2.next(); + if (!(termArray1 == null ? termArray2 == null : Arrays.equals(termArray1, + termArray2))) { + return false; + } + } + return true; + } +}