+++ /dev/null
-package org.apache.lucene.search.highlight;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.HashMap;
-import java.util.HashSet;
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.Query;
-
-/**
- * {@link Scorer} implementation which scores text fragments by the number of
- * unique query terms found. This class uses the {@link QueryTermExtractor}
- * class to process determine the query terms and their boosts to be used.
- */
-// TODO: provide option to boost score of fragments near beginning of document
-// based on fragment.getFragNum()
-public class QueryTermScorer implements Scorer {
-
- TextFragment currentTextFragment = null;
- HashSet<String> uniqueTermsInFragment;
-
- float totalScore = 0;
- float maxTermWeight = 0;
- private HashMap<String,WeightedTerm> termsToFind;
-
- private CharTermAttribute termAtt;
-
- /**
- *
- * @param query a Lucene query (ideally rewritten using query.rewrite before
- * being passed to this class and the searcher)
- */
- public QueryTermScorer(Query query) {
- this(QueryTermExtractor.getTerms(query));
- }
-
- /**
- *
- * @param query a Lucene query (ideally rewritten using query.rewrite before
- * being passed to this class and the searcher)
- * @param fieldName the Field name which is used to match Query terms
- */
- public QueryTermScorer(Query query, String fieldName) {
- this(QueryTermExtractor.getTerms(query, false, fieldName));
- }
-
- /**
- *
- * @param query a Lucene query (ideally rewritten using query.rewrite before
- * being passed to this class and the searcher)
- * @param reader used to compute IDF which can be used to a) score selected
- * fragments better b) use graded highlights eg set font color
- * intensity
- * @param fieldName the field on which Inverse Document Frequency (IDF)
- * calculations are based
- */
- public QueryTermScorer(Query query, IndexReader reader, String fieldName) {
- this(QueryTermExtractor.getIdfWeightedTerms(query, reader, fieldName));
- }
-
- public QueryTermScorer(WeightedTerm[] weightedTerms) {
- termsToFind = new HashMap<String,WeightedTerm>();
- for (int i = 0; i < weightedTerms.length; i++) {
- WeightedTerm existingTerm = termsToFind
- .get(weightedTerms[i].term);
- if ((existingTerm == null)
- || (existingTerm.weight < weightedTerms[i].weight)) {
- // if a term is defined more than once, always use the highest scoring
- // weight
- termsToFind.put(weightedTerms[i].term, weightedTerms[i]);
- maxTermWeight = Math.max(maxTermWeight, weightedTerms[i].getWeight());
- }
- }
- }
-
- /* (non-Javadoc)
- * @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream)
- */
- public TokenStream init(TokenStream tokenStream) {
- termAtt = tokenStream.addAttribute(CharTermAttribute.class);
- return null;
- }
-
- /*
- * (non-Javadoc)
- *
- * @see
- * org.apache.lucene.search.highlight.FragmentScorer#startFragment(org.apache
- * .lucene.search.highlight.TextFragment)
- */
- public void startFragment(TextFragment newFragment) {
- uniqueTermsInFragment = new HashSet<String>();
- currentTextFragment = newFragment;
- totalScore = 0;
-
- }
-
-
- /* (non-Javadoc)
- * @see org.apache.lucene.search.highlight.Scorer#getTokenScore()
- */
- public float getTokenScore() {
- String termText = termAtt.toString();
-
- WeightedTerm queryTerm = termsToFind.get(termText);
- if (queryTerm == null) {
- // not a query term - return
- return 0;
- }
- // found a query term - is it unique in this doc?
- if (!uniqueTermsInFragment.contains(termText)) {
- totalScore += queryTerm.getWeight();
- uniqueTermsInFragment.add(termText);
- }
- return queryTerm.getWeight();
- }
-
-
- /* (non-Javadoc)
- * @see org.apache.lucene.search.highlight.Scorer#getFragmentScore()
- */
- public float getFragmentScore() {
- return totalScore;
- }
-
- /*
- * (non-Javadoc)
- *
- * @see
- * org.apache.lucene.search.highlight.FragmentScorer#allFragmentsProcessed()
- */
- public void allFragmentsProcessed() {
- // this class has no special operations to perform at end of processing
- }
-
- /**
- *
- * @return The highest weighted term (useful for passing to GradientFormatter
- * to set top end of coloring scale.
- */
- public float getMaxTermWeight() {
- return maxTermWeight;
- }
-}