1 package org.apache.lucene.search.highlight;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.util.HashMap;
21 import java.util.HashSet;
23 import org.apache.lucene.analysis.TokenStream;
24 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
25 import org.apache.lucene.index.IndexReader;
26 import org.apache.lucene.search.Query;
29 * {@link Scorer} implementation which scores text fragments by the number of
30 * unique query terms found. This class uses the {@link QueryTermExtractor}
31 * class to process determine the query terms and their boosts to be used.
33 // TODO: provide option to boost score of fragments near beginning of document
34 // based on fragment.getFragNum()
35 public class QueryTermScorer implements Scorer {
37 TextFragment currentTextFragment = null;
38 HashSet<String> uniqueTermsInFragment;
41 float maxTermWeight = 0;
42 private HashMap<String,WeightedTerm> termsToFind;
44 private CharTermAttribute termAtt;
48 * @param query a Lucene query (ideally rewritten using query.rewrite before
49 * being passed to this class and the searcher)
51 public QueryTermScorer(Query query) {
52 this(QueryTermExtractor.getTerms(query));
57 * @param query a Lucene query (ideally rewritten using query.rewrite before
58 * being passed to this class and the searcher)
59 * @param fieldName the Field name which is used to match Query terms
61 public QueryTermScorer(Query query, String fieldName) {
62 this(QueryTermExtractor.getTerms(query, false, fieldName));
67 * @param query a Lucene query (ideally rewritten using query.rewrite before
68 * being passed to this class and the searcher)
69 * @param reader used to compute IDF which can be used to a) score selected
70 * fragments better b) use graded highlights eg set font color
72 * @param fieldName the field on which Inverse Document Frequency (IDF)
73 * calculations are based
75 public QueryTermScorer(Query query, IndexReader reader, String fieldName) {
76 this(QueryTermExtractor.getIdfWeightedTerms(query, reader, fieldName));
79 public QueryTermScorer(WeightedTerm[] weightedTerms) {
80 termsToFind = new HashMap<String,WeightedTerm>();
81 for (int i = 0; i < weightedTerms.length; i++) {
82 WeightedTerm existingTerm = termsToFind
83 .get(weightedTerms[i].term);
84 if ((existingTerm == null)
85 || (existingTerm.weight < weightedTerms[i].weight)) {
86 // if a term is defined more than once, always use the highest scoring
88 termsToFind.put(weightedTerms[i].term, weightedTerms[i]);
89 maxTermWeight = Math.max(maxTermWeight, weightedTerms[i].getWeight());
95 * @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream)
97 public TokenStream init(TokenStream tokenStream) {
98 termAtt = tokenStream.addAttribute(CharTermAttribute.class);
106 * org.apache.lucene.search.highlight.FragmentScorer#startFragment(org.apache
107 * .lucene.search.highlight.TextFragment)
109 public void startFragment(TextFragment newFragment) {
110 uniqueTermsInFragment = new HashSet<String>();
111 currentTextFragment = newFragment;
118 * @see org.apache.lucene.search.highlight.Scorer#getTokenScore()
120 public float getTokenScore() {
121 String termText = termAtt.toString();
123 WeightedTerm queryTerm = termsToFind.get(termText);
124 if (queryTerm == null) {
125 // not a query term - return
128 // found a query term - is it unique in this doc?
129 if (!uniqueTermsInFragment.contains(termText)) {
130 totalScore += queryTerm.getWeight();
131 uniqueTermsInFragment.add(termText);
133 return queryTerm.getWeight();
138 * @see org.apache.lucene.search.highlight.Scorer#getFragmentScore()
140 public float getFragmentScore() {
148 * org.apache.lucene.search.highlight.FragmentScorer#allFragmentsProcessed()
150 public void allFragmentsProcessed() {
151 // this class has no special operations to perform at end of processing
156 * @return The highest weighted term (useful for passing to GradientFormatter
157 * to set top end of coloring scale.
159 public float getMaxTermWeight() {
160 return maxTermWeight;