--- /dev/null
+package org.apache.lucene.search.highlight;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.HashMap;
+import java.util.HashSet;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.Query;
+
+/**
+ * {@link Scorer} implementation which scores text fragments by the number of
+ * unique query terms found. This class uses the {@link QueryTermExtractor}
+ * class to process determine the query terms and their boosts to be used.
+ */
+// TODO: provide option to boost score of fragments near beginning of document
+// based on fragment.getFragNum()
+public class QueryTermScorer implements Scorer {
+
+ TextFragment currentTextFragment = null;
+ HashSet<String> uniqueTermsInFragment;
+
+ float totalScore = 0;
+ float maxTermWeight = 0;
+ private HashMap<String,WeightedTerm> termsToFind;
+
+ private CharTermAttribute termAtt;
+
+ /**
+ *
+ * @param query a Lucene query (ideally rewritten using query.rewrite before
+ * being passed to this class and the searcher)
+ */
+ public QueryTermScorer(Query query) {
+ this(QueryTermExtractor.getTerms(query));
+ }
+
+ /**
+ *
+ * @param query a Lucene query (ideally rewritten using query.rewrite before
+ * being passed to this class and the searcher)
+ * @param fieldName the Field name which is used to match Query terms
+ */
+ public QueryTermScorer(Query query, String fieldName) {
+ this(QueryTermExtractor.getTerms(query, false, fieldName));
+ }
+
+ /**
+ *
+ * @param query a Lucene query (ideally rewritten using query.rewrite before
+ * being passed to this class and the searcher)
+ * @param reader used to compute IDF which can be used to a) score selected
+ * fragments better b) use graded highlights eg set font color
+ * intensity
+ * @param fieldName the field on which Inverse Document Frequency (IDF)
+ * calculations are based
+ */
+ public QueryTermScorer(Query query, IndexReader reader, String fieldName) {
+ this(QueryTermExtractor.getIdfWeightedTerms(query, reader, fieldName));
+ }
+
+ public QueryTermScorer(WeightedTerm[] weightedTerms) {
+ termsToFind = new HashMap<String,WeightedTerm>();
+ for (int i = 0; i < weightedTerms.length; i++) {
+ WeightedTerm existingTerm = termsToFind
+ .get(weightedTerms[i].term);
+ if ((existingTerm == null)
+ || (existingTerm.weight < weightedTerms[i].weight)) {
+ // if a term is defined more than once, always use the highest scoring
+ // weight
+ termsToFind.put(weightedTerms[i].term, weightedTerms[i]);
+ maxTermWeight = Math.max(maxTermWeight, weightedTerms[i].getWeight());
+ }
+ }
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream)
+ */
+ public TokenStream init(TokenStream tokenStream) {
+ termAtt = tokenStream.addAttribute(CharTermAttribute.class);
+ return null;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
+ * org.apache.lucene.search.highlight.FragmentScorer#startFragment(org.apache
+ * .lucene.search.highlight.TextFragment)
+ */
+ public void startFragment(TextFragment newFragment) {
+ uniqueTermsInFragment = new HashSet<String>();
+ currentTextFragment = newFragment;
+ totalScore = 0;
+
+ }
+
+
+ /* (non-Javadoc)
+ * @see org.apache.lucene.search.highlight.Scorer#getTokenScore()
+ */
+ public float getTokenScore() {
+ String termText = termAtt.toString();
+
+ WeightedTerm queryTerm = termsToFind.get(termText);
+ if (queryTerm == null) {
+ // not a query term - return
+ return 0;
+ }
+ // found a query term - is it unique in this doc?
+ if (!uniqueTermsInFragment.contains(termText)) {
+ totalScore += queryTerm.getWeight();
+ uniqueTermsInFragment.add(termText);
+ }
+ return queryTerm.getWeight();
+ }
+
+
+ /* (non-Javadoc)
+ * @see org.apache.lucene.search.highlight.Scorer#getFragmentScore()
+ */
+ public float getFragmentScore() {
+ return totalScore;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
+ * org.apache.lucene.search.highlight.FragmentScorer#allFragmentsProcessed()
+ */
+ public void allFragmentsProcessed() {
+ // this class has no special operations to perform at end of processing
+ }
+
+ /**
+ *
+ * @return The highest weighted term (useful for passing to GradientFormatter
+ * to set top end of coloring scale.
+ */
+ public float getMaxTermWeight() {
+ return maxTermWeight;
+ }
+}