1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.util.Arrays;
23 import org.apache.lucene.index.*;
25 final class ExactPhraseScorer extends Scorer {
26 private final byte[] norms;
27 private final float value;
29 private static final int SCORE_CACHE_SIZE = 32;
30 private final float[] scoreCache = new float[SCORE_CACHE_SIZE];
32 private final int endMinus1;
34 private final static int CHUNK = 4096;
37 private final int[] counts = new int[CHUNK];
38 private final int[] gens = new int[CHUNK];
42 private final static class ChunkState {
43 final TermPositions posEnum;
45 final boolean useAdvance;
51 public ChunkState(TermPositions posEnum, int offset, boolean useAdvance) {
52 this.posEnum = posEnum;
54 this.useAdvance = useAdvance;
58 private final ChunkState[] chunkStates;
60 private int docID = -1;
63 ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
64 Similarity similarity, byte[] norms) throws IOException {
65 super(similarity, weight);
67 this.value = weight.getValue();
69 chunkStates = new ChunkState[postings.length];
71 endMinus1 = postings.length-1;
73 for(int i=0;i<postings.length;i++) {
75 // Coarse optimization: advance(target) is fairly
76 // costly, so, if the relative freq of the 2nd
77 // rarest term is not that much (> 1/5th) rarer than
78 // the first term, then we just use .nextDoc() when
79 // ANDing. This buys ~15% gain for phrases where
80 // freq of rarest 2 terms is close:
81 final boolean useAdvance = postings[i].docFreq > 5*postings[0].docFreq;
82 chunkStates[i] = new ChunkState(postings[i].postings, -postings[i].position, useAdvance);
83 if (i > 0 && !postings[i].postings.next()) {
89 for (int i = 0; i < SCORE_CACHE_SIZE; i++) {
90 scoreCache[i] = getSimilarity().tf((float) i) * value;
95 public int nextDoc() throws IOException {
98 // first (rarest) term
99 if (!chunkStates[0].posEnum.next()) {
100 docID = DocIdSetIterator.NO_MORE_DOCS;
104 final int doc = chunkStates[0].posEnum.doc();
108 while(i < chunkStates.length) {
109 final ChunkState cs = chunkStates[i];
110 int doc2 = cs.posEnum.doc();
113 if (!cs.posEnum.skipTo(doc)) {
114 docID = DocIdSetIterator.NO_MORE_DOCS;
117 doc2 = cs.posEnum.doc();
123 // safety net -- fallback to .skipTo if we've
124 // done too many .nextDocs
126 if (!cs.posEnum.skipTo(doc)) {
127 docID = DocIdSetIterator.NO_MORE_DOCS;
130 doc2 = cs.posEnum.doc();
134 if (cs.posEnum.next()) {
135 doc2 = cs.posEnum.doc();
137 docID = DocIdSetIterator.NO_MORE_DOCS;
149 if (i == chunkStates.length) {
150 // this doc has all the terms -- now test whether
163 public int advance(int target) throws IOException {
166 if (!chunkStates[0].posEnum.skipTo(target)) {
167 docID = DocIdSetIterator.NO_MORE_DOCS;
170 int doc = chunkStates[0].posEnum.doc();
176 while(i < chunkStates.length) {
177 int doc2 = chunkStates[i].posEnum.doc();
179 if (!chunkStates[i].posEnum.skipTo(doc)) {
180 docID = DocIdSetIterator.NO_MORE_DOCS;
183 doc2 = chunkStates[i].posEnum.doc();
192 if (i == chunkStates.length) {
193 // this doc has all the terms -- now test whether
202 if (!chunkStates[0].posEnum.next()) {
203 docID = DocIdSetIterator.NO_MORE_DOCS;
206 doc = chunkStates[0].posEnum.doc();
212 public String toString() {
213 return "ExactPhraseScorer(" + weight + ")";
217 public float freq() {
227 public float score() throws IOException {
228 final float raw; // raw score
229 if (freq < SCORE_CACHE_SIZE) {
230 raw = scoreCache[freq];
232 raw = getSimilarity().tf((float) freq) * value;
234 return norms == null ? raw : raw * getSimilarity().decodeNormValue(norms[docID]); // normalize
237 private int phraseFreq() throws IOException {
242 for(int i=0;i<chunkStates.length;i++) {
243 final ChunkState cs = chunkStates[i];
244 cs.posLimit = cs.posEnum.freq();
245 cs.pos = cs.offset + cs.posEnum.nextPosition();
251 int chunkEnd = CHUNK;
253 // process chunk by chunk
256 // TODO: we could fold in chunkStart into offset and
257 // save one subtract per pos incr
265 Arrays.fill(gens, 0);
271 final ChunkState cs = chunkStates[0];
272 while(cs.pos < chunkEnd) {
273 if (cs.pos > cs.lastPos) {
275 final int posIndex = cs.pos - chunkStart;
276 counts[posIndex] = 1;
277 assert gens[posIndex] != gen;
278 gens[posIndex] = gen;
281 if (cs.posUpto == cs.posLimit) {
286 cs.pos = cs.offset + cs.posEnum.nextPosition();
292 for(int t=1;t<endMinus1;t++) {
293 final ChunkState cs = chunkStates[t];
295 while(cs.pos < chunkEnd) {
296 if (cs.pos > cs.lastPos) {
298 final int posIndex = cs.pos - chunkStart;
299 if (posIndex >= 0 && gens[posIndex] == gen && counts[posIndex] == t) {
306 if (cs.posUpto == cs.posLimit) {
311 cs.pos = cs.offset + cs.posEnum.nextPosition();
320 // petered out for this chunk
329 final ChunkState cs = chunkStates[endMinus1];
330 while(cs.pos < chunkEnd) {
331 if (cs.pos > cs.lastPos) {
333 final int posIndex = cs.pos - chunkStart;
334 if (posIndex >= 0 && gens[posIndex] == gen && counts[posIndex] == endMinus1) {
339 if (cs.posUpto == cs.posLimit) {
344 cs.pos = cs.offset + cs.posEnum.nextPosition();