1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
22 /** Expert: Scoring functionality for phrase queries.
23 * <br>A document is considered matching if it contains the phrase-query terms
24 * at "valid" positions. What "valid positions" are
25 * depends on the type of the phrase query: for an exact phrase query terms are required
26 * to appear in adjacent locations, while for a sloppy phrase query some distance between
27 * the terms is allowed. The abstract method {@link #phraseFreq()} of extending classes
28 * is invoked for each document containing all the phrase query terms, in order to
29 * compute the frequency of the phrase query in that document. A non zero frequency
32 abstract class PhraseScorer extends Scorer {
33 protected byte[] norms;
34 protected float value;
36 private boolean firstTime = true;
37 private boolean more = true;
38 protected PhraseQueue pq;
39 protected PhrasePositions first, last;
41 private float freq; //phrase frequency in current doc as computed by phraseFreq().
43 PhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
44 Similarity similarity, byte[] norms) {
45 super(similarity, weight);
47 this.value = weight.getValue();
49 // convert tps to a list of phrase positions.
50 // note: phrase-position differs from term-position in that its position
51 // reflects the phrase offset: pp.pos = tp.pos - offset.
52 // this allows to easily identify a matching (exact) phrase
53 // when all PhrasePositions have exactly the same position.
54 for (int i = 0; i < postings.length; i++) {
55 PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i);
56 if (last != null) { // add next to end of list
64 pq = new PhraseQueue(postings.length); // construct empty pq
69 public int docID() { return first.doc; }
72 public int nextDoc() throws IOException {
77 more = last.next(); // trigger further scanning
80 first.doc = NO_MORE_DOCS;
85 // next without initial increment
86 private boolean doNext() throws IOException {
88 while (more && first.doc < last.doc) { // find doc w/ all the terms
89 more = first.skipTo(last.doc); // skip first upto last
90 firstToLast(); // and move it to the end
94 // found a doc with all of the terms
95 freq = phraseFreq(); // check for phrase
96 if (freq == 0.0f) // no match
97 more = last.next(); // trigger further scanning
99 return true; // found a match
102 return false; // no more matches
106 public float score() throws IOException {
107 //System.out.println("scoring " + first.doc);
108 float raw = getSimilarity().tf(freq) * value; // raw score
109 return norms == null ? raw : raw * getSimilarity().decodeNormValue(norms[first.doc]); // normalize
113 public int advance(int target) throws IOException {
115 for (PhrasePositions pp = first; more && pp != null; pp = pp.next) {
116 more = pp.skipTo(target);
122 first.doc = NO_MORE_DOCS;
128 * phrase frequency in current doc as computed by phraseFreq().
131 public final float freq() {
136 * For a document containing all the phrase query terms, compute the
137 * frequency of the phrase in that document.
138 * A non zero frequency means a match.
139 * <br>Note, that containing all phrase terms does not guarantee a match - they have to be found in matching locations.
140 * @return frequency of the phrase in current doc, 0 if not found.
142 protected abstract float phraseFreq() throws IOException;
144 private void init() throws IOException {
145 for (PhrasePositions pp = first; more && pp != null; pp = pp.next) {
153 private void sort() {
155 for (PhrasePositions pp = first; pp != null; pp = pp.next) {
161 protected final void pqToList() {
163 while (pq.top() != null) {
164 PhrasePositions pp = pq.pop();
165 if (last != null) { // add next to end of list
174 protected final void firstToLast() {
175 last.next = first; // move first to end of list
182 public String toString() { return "scorer(" + weight + ")"; }