1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
22 /** Expert: Scoring functionality for phrase queries.
23 * <br>A document is considered matching if it contains the phrase-query terms
24 * at "valid" positions. What "valid positions" are
25 * depends on the type of the phrase query: for an exact phrase query terms are required
26 * to appear in adjacent locations, while for a sloppy phrase query some distance between
27 * the terms is allowed. The abstract method {@link #phraseFreq()} of extending classes
28 * is invoked for each document containing all the phrase query terms, in order to
29 * compute the frequency of the phrase query in that document. A non zero frequency
32 abstract class PhraseScorer extends Scorer {
33 protected byte[] norms;
34 protected float value;
36 PhrasePositions min, max;
38 private float freq; //phrase frequency in current doc as computed by phraseFreq().
40 PhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
41 Similarity similarity, byte[] norms) {
42 super(similarity, weight);
44 this.value = weight.getValue();
46 // convert tps to a list of phrase positions.
47 // note: phrase-position differs from term-position in that its position
48 // reflects the phrase offset: pp.pos = tp.pos - offset.
49 // this allows to easily identify a matching (exact) phrase
50 // when all PhrasePositions have exactly the same position.
51 if (postings.length > 0) {
52 min = new PhrasePositions(postings[0].postings, postings[0].position, 0);
55 for (int i = 1; i < postings.length; i++) {
56 PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i);
61 max.next = min; // make it cyclic for easier manipulation
71 public int nextDoc() throws IOException {
72 return advance(max.doc);
75 private boolean advanceMin(int target) throws IOException {
76 if (!min.skipTo(target)) {
77 max.doc = NO_MORE_DOCS; // for further calls to docID()
80 min = min.next; // cyclic
81 max = max.next; // cyclic
86 public float score() throws IOException {
87 //System.out.println("scoring " + max.doc);
88 float raw = getSimilarity().tf(freq) * value; // raw score
89 return norms == null ? raw : raw * getSimilarity().decodeNormValue(norms[max.doc]); // normalize
93 public int advance(int target) throws IOException {
95 if (!advanceMin(target)) {
98 boolean restart=false;
99 while (freq == 0.0f) {
100 while (min.doc < max.doc || restart) {
102 if (!advanceMin(max.doc)) {
106 // found a doc with all of the terms
107 freq = phraseFreq(); // check for phrase
116 * phrase frequency in current doc as computed by phraseFreq().
119 public final float freq() {
124 * For a document containing all the phrase query terms, compute the
125 * frequency of the phrase in that document.
126 * A non zero frequency means a match.
127 * <br>Note, that containing all phrase terms does not guarantee a match - they have to be found in matching locations.
128 * @return frequency of the phrase in current doc, 0 if not found.
130 abstract float phraseFreq() throws IOException;
133 public String toString() { return "scorer(" + weight + ")"; }