X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/search/PhraseQuery.java diff --git a/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/search/PhraseQuery.java deleted file mode 100644 index f41c7f2..0000000 --- a/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/search/PhraseQuery.java +++ /dev/null @@ -1,413 +0,0 @@ -package org.apache.lucene.search; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.Set; -import java.util.ArrayList; - -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermPositions; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.Explanation.IDFExplanation; -import org.apache.lucene.util.ToStringUtils; -import org.apache.lucene.util.ArrayUtil; - -/** A Query that matches documents containing a particular sequence of terms. - * A PhraseQuery is built by QueryParser for input like "new york". - * - *

This query may be combined with other terms or queries with a {@link BooleanQuery}. - */ -public class PhraseQuery extends Query { - private String field; - private ArrayList terms = new ArrayList(4); - private ArrayList positions = new ArrayList(4); - private int maxPosition = 0; - private int slop = 0; - - /** Constructs an empty phrase query. */ - public PhraseQuery() {} - - /** Sets the number of other words permitted between words in query phrase. - If zero, then this is an exact phrase search. For larger values this works - like a WITHIN or NEAR operator. - -

The slop is in fact an edit-distance, where the units correspond to - moves of terms in the query phrase out of position. For example, to switch - the order of two words requires two moves (the first move places the words - atop one another), so to permit re-orderings of phrases, the slop must be - at least two. - -

More exact matches are scored higher than sloppier matches, thus search - results are sorted by exactness. - -

The slop is zero by default, requiring exact matches.*/ - public void setSlop(int s) { slop = s; } - /** Returns the slop. See setSlop(). */ - public int getSlop() { return slop; } - - /** - * Adds a term to the end of the query phrase. - * The relative position of the term is the one immediately after the last term added. - */ - public void add(Term term) { - int position = 0; - if(positions.size() > 0) - position = positions.get(positions.size()-1).intValue() + 1; - - add(term, position); - } - - /** - * Adds a term to the end of the query phrase. - * The relative position of the term within the phrase is specified explicitly. - * This allows e.g. phrases with more than one term at the same position - * or phrases with gaps (e.g. in connection with stopwords). - * - * @param term - * @param position - */ - public void add(Term term, int position) { - if (terms.size() == 0) - field = term.field(); - else if (term.field() != field) - throw new IllegalArgumentException("All phrase terms must be in the same field: " + term); - - terms.add(term); - positions.add(Integer.valueOf(position)); - if (position > maxPosition) maxPosition = position; - } - - /** Returns the set of terms in this phrase. */ - public Term[] getTerms() { - return terms.toArray(new Term[0]); - } - - /** - * Returns the relative positions of terms in this phrase. - */ - public int[] getPositions() { - int[] result = new int[positions.size()]; - for(int i = 0; i < positions.size(); i++) - result[i] = positions.get(i).intValue(); - return result; - } - - @Override - public Query rewrite(IndexReader reader) throws IOException { - if (terms.size() == 1) { - TermQuery tq = new TermQuery(terms.get(0)); - tq.setBoost(getBoost()); - return tq; - } else - return super.rewrite(reader); - } - - static class PostingsAndFreq implements Comparable { - final TermPositions postings; - final int docFreq; - final int position; - final Term term; - - public PostingsAndFreq(TermPositions postings, int docFreq, int position, Term term) { - this.postings = postings; - this.docFreq = docFreq; - this.position = position; - this.term = term; - } - - public int compareTo(PostingsAndFreq other) { - if (docFreq == other.docFreq) { - if (position == other.position) { - return term.compareTo(other.term); - } - return position - other.position; - } - return docFreq - other.docFreq; - } - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + docFreq; - result = prime * result + position; - result = prime * result + ((term == null) ? 0 : term.hashCode()); - return result; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) return true; - if (obj == null) return false; - if (getClass() != obj.getClass()) return false; - PostingsAndFreq other = (PostingsAndFreq) obj; - if (docFreq != other.docFreq) return false; - if (position != other.position) return false; - if (term == null) { - if (other.term != null) return false; - } else if (!term.equals(other.term)) return false; - return true; - } - } - - private class PhraseWeight extends Weight { - private final Similarity similarity; - private float value; - private float idf; - private float queryNorm; - private float queryWeight; - private IDFExplanation idfExp; - - public PhraseWeight(Searcher searcher) - throws IOException { - this.similarity = getSimilarity(searcher); - - idfExp = similarity.idfExplain(terms, searcher); - idf = idfExp.getIdf(); - } - - @Override - public String toString() { return "weight(" + PhraseQuery.this + ")"; } - - @Override - public Query getQuery() { return PhraseQuery.this; } - - @Override - public float getValue() { return value; } - - @Override - public float sumOfSquaredWeights() { - queryWeight = idf * getBoost(); // compute query weight - return queryWeight * queryWeight; // square it - } - - @Override - public void normalize(float queryNorm) { - this.queryNorm = queryNorm; - queryWeight *= queryNorm; // normalize query weight - value = queryWeight * idf; // idf for document - } - - @Override - public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { - if (terms.size() == 0) // optimize zero-term case - return null; - - PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[terms.size()]; - for (int i = 0; i < terms.size(); i++) { - final Term t = terms.get(i); - TermPositions p = reader.termPositions(t); - if (p == null) - return null; - postingsFreqs[i] = new PostingsAndFreq(p, reader.docFreq(t), positions.get(i).intValue(), t); - } - - // sort by increasing docFreq order - if (slop == 0) { - ArrayUtil.mergeSort(postingsFreqs); - } - - if (slop == 0) { // optimize exact case - ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity, - reader.norms(field)); - if (s.noDocs) { - return null; - } else { - return s; - } - } else { - return - new SloppyPhraseScorer(this, postingsFreqs, similarity, slop, - reader.norms(field)); - } - } - - @Override - public Explanation explain(IndexReader reader, int doc) - throws IOException { - - ComplexExplanation result = new ComplexExplanation(); - result.setDescription("weight("+getQuery()+" in "+doc+"), product of:"); - - StringBuilder docFreqs = new StringBuilder(); - StringBuilder query = new StringBuilder(); - query.append('\"'); - docFreqs.append(idfExp.explain()); - for (int i = 0; i < terms.size(); i++) { - if (i != 0) { - query.append(" "); - } - - Term term = terms.get(i); - - query.append(term.text()); - } - query.append('\"'); - - Explanation idfExpl = - new Explanation(idf, "idf(" + field + ":" + docFreqs + ")"); - - // explain query weight - Explanation queryExpl = new Explanation(); - queryExpl.setDescription("queryWeight(" + getQuery() + "), product of:"); - - Explanation boostExpl = new Explanation(getBoost(), "boost"); - if (getBoost() != 1.0f) - queryExpl.addDetail(boostExpl); - queryExpl.addDetail(idfExpl); - - Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm"); - queryExpl.addDetail(queryNormExpl); - - queryExpl.setValue(boostExpl.getValue() * - idfExpl.getValue() * - queryNormExpl.getValue()); - - result.addDetail(queryExpl); - - // explain field weight - Explanation fieldExpl = new Explanation(); - fieldExpl.setDescription("fieldWeight("+field+":"+query+" in "+doc+ - "), product of:"); - - Scorer scorer = scorer(reader, true, false); - if (scorer == null) { - return new Explanation(0.0f, "no matching docs"); - } - Explanation tfExplanation = new Explanation(); - int d = scorer.advance(doc); - float phraseFreq; - if (d == doc) { - phraseFreq = scorer.freq(); - } else { - phraseFreq = 0.0f; - } - - tfExplanation.setValue(similarity.tf(phraseFreq)); - tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")"); - - fieldExpl.addDetail(tfExplanation); - fieldExpl.addDetail(idfExpl); - - Explanation fieldNormExpl = new Explanation(); - byte[] fieldNorms = reader.norms(field); - float fieldNorm = - fieldNorms!=null ? similarity.decodeNormValue(fieldNorms[doc]) : 1.0f; - fieldNormExpl.setValue(fieldNorm); - fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")"); - fieldExpl.addDetail(fieldNormExpl); - - fieldExpl.setValue(tfExplanation.getValue() * - idfExpl.getValue() * - fieldNormExpl.getValue()); - - result.addDetail(fieldExpl); - - // combine them - result.setValue(queryExpl.getValue() * fieldExpl.getValue()); - result.setMatch(tfExplanation.isMatch()); - return result; - } - } - - @Override - public Weight createWeight(Searcher searcher) throws IOException { - if (terms.size() == 1) { // optimize one-term case - Term term = terms.get(0); - Query termQuery = new TermQuery(term); - termQuery.setBoost(getBoost()); - return termQuery.createWeight(searcher); - } - return new PhraseWeight(searcher); - } - - /** - * @see org.apache.lucene.search.Query#extractTerms(Set) - */ - @Override - public void extractTerms(Set queryTerms) { - queryTerms.addAll(terms); - } - - /** Prints a user-readable version of this query. */ - @Override - public String toString(String f) { - StringBuilder buffer = new StringBuilder(); - if (field != null && !field.equals(f)) { - buffer.append(field); - buffer.append(":"); - } - - buffer.append("\""); - String[] pieces = new String[maxPosition + 1]; - for (int i = 0; i < terms.size(); i++) { - int pos = positions.get(i).intValue(); - String s = pieces[pos]; - if (s == null) { - s = (terms.get(i)).text(); - } else { - s = s + "|" + (terms.get(i)).text(); - } - pieces[pos] = s; - } - for (int i = 0; i < pieces.length; i++) { - if (i > 0) { - buffer.append(' '); - } - String s = pieces[i]; - if (s == null) { - buffer.append('?'); - } else { - buffer.append(s); - } - } - buffer.append("\""); - - if (slop != 0) { - buffer.append("~"); - buffer.append(slop); - } - - buffer.append(ToStringUtils.boost(getBoost())); - - return buffer.toString(); - } - - /** Returns true iff o is equal to this. */ - @Override - public boolean equals(Object o) { - if (!(o instanceof PhraseQuery)) - return false; - PhraseQuery other = (PhraseQuery)o; - return (this.getBoost() == other.getBoost()) - && (this.slop == other.slop) - && this.terms.equals(other.terms) - && this.positions.equals(other.positions); - } - - /** Returns a hash code value for this object.*/ - @Override - public int hashCode() { - return Float.floatToIntBits(getBoost()) - ^ slop - ^ terms.hashCode() - ^ positions.hashCode(); - } - -}