lucene-java-3.5.0/lucene/src/java/org/apache/lucene/search/PhraseScorer.java

   1 package org.apache.lucene.search;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.IOException;
  21
  22 /** Expert: Scoring functionality for phrase queries.
  23  * <br>A document is considered matching if it contains the phrase-query terms
  24  * at "valid" positions. What "valid positions" are
  25  * depends on the type of the phrase query: for an exact phrase query terms are required
  26  * to appear in adjacent locations, while for a sloppy phrase query some distance between
  27  * the terms is allowed. The abstract method {@link #phraseFreq()} of extending classes
  28  * is invoked for each document containing all the phrase query terms, in order to
  29  * compute the frequency of the phrase query in that document. A non zero frequency
  30  * means a match.
  31  */
  32 abstract class PhraseScorer extends Scorer {
  33   protected byte[] norms;
  34   protected float value;
  35
  36   PhrasePositions min, max;
  37
  38   private float freq; //phrase frequency in current doc as computed by phraseFreq().
  39
  40   PhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
  41       Similarity similarity, byte[] norms) {
  42     super(similarity, weight);
  43     this.norms = norms;
  44     this.value = weight.getValue();
  45
  46     // convert tps to a list of phrase positions.
  47     // note: phrase-position differs from term-position in that its position
  48     // reflects the phrase offset: pp.pos = tp.pos - offset.
  49     // this allows to easily identify a matching (exact) phrase
  50     // when all PhrasePositions have exactly the same position.
  51     if (postings.length > 0) {
  52       min = new PhrasePositions(postings[0].postings, postings[0].position, 0);
  53       max = min;
  54       max.doc = -1;
  55       for (int i = 1; i < postings.length; i++) {
  56         PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i);
  57         max.next = pp;
  58         max = pp;
  59         max.doc = -1;
  60       }
  61       max.next = min; // make it cyclic for easier manipulation
  62     }
  63   }
  64
  65   @Override
  66   public int docID() {
  67     return max.doc;
  68   }
  69
  70   @Override
  71   public int nextDoc() throws IOException {
  72     return advance(max.doc);
  73   }
  74
  75   private boolean advanceMin(int target) throws IOException {
  76     if (!min.skipTo(target)) {
  77       max.doc = NO_MORE_DOCS; // for further calls to docID()
  78       return false;
  79     }
  80     min = min.next; // cyclic
  81     max = max.next; // cyclic
  82     return true;
  83   }
  84
  85   @Override
  86   public float score() throws IOException {
  87     //System.out.println("scoring " + max.doc);
  88     float raw = getSimilarity().tf(freq) * value; // raw score
  89     return norms == null ? raw : raw * getSimilarity().decodeNormValue(norms[max.doc]); // normalize
  90   }
  91
  92   @Override
  93   public int advance(int target) throws IOException {
  94     freq = 0.0f;
  95     if (!advanceMin(target)) {
  96       return NO_MORE_DOCS;
  97     }
  98     boolean restart=false;
  99     while (freq == 0.0f) {
 100       while (min.doc < max.doc || restart) {
 101         restart = false;
 102         if (!advanceMin(max.doc)) {
 103           return NO_MORE_DOCS;
 104         }
 105       }
 106       // found a doc with all of the terms
 107       freq = phraseFreq(); // check for phrase
 108       restart = true;
 109     }
 110
 111     // found a match
 112     return max.doc;
 113   }
 114
 115   /**
 116    * phrase frequency in current doc as computed by phraseFreq().
 117    */
 118   @Override
 119   public final float freq() {
 120     return freq;
 121   }
 122
 123   /**
 124    * For a document containing all the phrase query terms, compute the
 125    * frequency of the phrase in that document.
 126    * A non zero frequency means a match.
 127    * <br>Note, that containing all phrase terms does not guarantee a match - they have to be found in matching locations.
 128    * @return frequency of the phrase in current doc, 0 if not found.
 129    */
 130   abstract float phraseFreq() throws IOException;
 131
 132   @Override
 133   public String toString() { return "scorer(" + weight + ")"; }
 134
 135 }