+++ /dev/null
-package org.apache.lucene.search.highlight;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.lucene.analysis.CachingTokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.memory.MemoryIndex;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.util.StringHelper;
-
-/**
- * {@link Scorer} implementation which scores text fragments by the number of
- * unique query terms found. This class converts appropriate {@link Query}s to
- * {@link SpanQuery}s and attempts to score only those terms that participated in
- * generating the 'hit' on the document.
- */
-public class QueryScorer implements Scorer {
- private float totalScore;
- private Set<String> foundTerms;
- private Map<String,WeightedSpanTerm> fieldWeightedSpanTerms;
- private float maxTermWeight;
- private int position = -1;
- private String defaultField;
- private CharTermAttribute termAtt;
- private PositionIncrementAttribute posIncAtt;
- private boolean expandMultiTermQuery = true;
- private Query query;
- private String field;
- private IndexReader reader;
- private boolean skipInitExtractor;
- private boolean wrapToCaching = true;
- private int maxCharsToAnalyze;
-
- /**
- * @param query Query to use for highlighting
- */
- public QueryScorer(Query query) {
- init(query, null, null, true);
- }
-
- /**
- * @param query Query to use for highlighting
- * @param field Field to highlight - pass null to ignore fields
- */
- public QueryScorer(Query query, String field) {
- init(query, field, null, true);
- }
-
- /**
- * @param query Query to use for highlighting
- * @param field Field to highlight - pass null to ignore fields
- * @param reader {@link IndexReader} to use for quasi tf/idf scoring
- */
- public QueryScorer(Query query, IndexReader reader, String field) {
- init(query, field, reader, true);
- }
-
-
- /**
- * @param query to use for highlighting
- * @param reader {@link IndexReader} to use for quasi tf/idf scoring
- * @param field to highlight - pass null to ignore fields
- * @param defaultField
- */
- public QueryScorer(Query query, IndexReader reader, String field, String defaultField) {
- this.defaultField = StringHelper.intern(defaultField);
- init(query, field, reader, true);
- }
-
- /**
- * @param defaultField - The default field for queries with the field name unspecified
- */
- public QueryScorer(Query query, String field, String defaultField) {
- this.defaultField = StringHelper.intern(defaultField);
- init(query, field, null, true);
- }
-
- /**
- * @param weightedTerms an array of pre-created {@link WeightedSpanTerm}s
- */
- public QueryScorer(WeightedSpanTerm[] weightedTerms) {
- this.fieldWeightedSpanTerms = new HashMap<String,WeightedSpanTerm>(weightedTerms.length);
-
- for (int i = 0; i < weightedTerms.length; i++) {
- WeightedSpanTerm existingTerm = fieldWeightedSpanTerms.get(weightedTerms[i].term);
-
- if ((existingTerm == null) ||
- (existingTerm.weight < weightedTerms[i].weight)) {
- // if a term is defined more than once, always use the highest
- // scoring weight
- fieldWeightedSpanTerms.put(weightedTerms[i].term, weightedTerms[i]);
- maxTermWeight = Math.max(maxTermWeight, weightedTerms[i].getWeight());
- }
- }
- skipInitExtractor = true;
- }
-
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.search.highlight.Scorer#getFragmentScore()
- */
- public float getFragmentScore() {
- return totalScore;
- }
-
- /**
- *
- * @return The highest weighted term (useful for passing to
- * GradientFormatter to set top end of coloring scale).
- */
- public float getMaxTermWeight() {
- return maxTermWeight;
- }
-
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.search.highlight.Scorer#getTokenScore(org.apache.lucene.analysis.Token,
- * int)
- */
- public float getTokenScore() {
- position += posIncAtt.getPositionIncrement();
- String termText = termAtt.toString();
-
- WeightedSpanTerm weightedSpanTerm;
-
- if ((weightedSpanTerm = fieldWeightedSpanTerms.get(
- termText)) == null) {
- return 0;
- }
-
- if (weightedSpanTerm.positionSensitive &&
- !weightedSpanTerm.checkPosition(position)) {
- return 0;
- }
-
- float score = weightedSpanTerm.getWeight();
-
- // found a query term - is it unique in this doc?
- if (!foundTerms.contains(termText)) {
- totalScore += score;
- foundTerms.add(termText);
- }
-
- return score;
- }
-
- /* (non-Javadoc)
- * @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream)
- */
- public TokenStream init(TokenStream tokenStream) throws IOException {
- position = -1;
- termAtt = tokenStream.addAttribute(CharTermAttribute.class);
- posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
- if(!skipInitExtractor) {
- if(fieldWeightedSpanTerms != null) {
- fieldWeightedSpanTerms.clear();
- }
- return initExtractor(tokenStream);
- }
- return null;
- }
-
- /**
- * Retrieve the {@link WeightedSpanTerm} for the specified token. Useful for passing
- * Span information to a {@link Fragmenter}.
- *
- * @param token to get {@link WeightedSpanTerm} for
- * @return WeightedSpanTerm for token
- */
- public WeightedSpanTerm getWeightedSpanTerm(String token) {
- return fieldWeightedSpanTerms.get(token);
- }
-
- /**
- */
- private void init(Query query, String field, IndexReader reader, boolean expandMultiTermQuery) {
- this.reader = reader;
- this.expandMultiTermQuery = expandMultiTermQuery;
- this.query = query;
- this.field = field;
- }
-
- private TokenStream initExtractor(TokenStream tokenStream) throws IOException {
- WeightedSpanTermExtractor qse = defaultField == null ? new WeightedSpanTermExtractor()
- : new WeightedSpanTermExtractor(defaultField);
- qse.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
- qse.setExpandMultiTermQuery(expandMultiTermQuery);
- qse.setWrapIfNotCachingTokenFilter(wrapToCaching);
- if (reader == null) {
- this.fieldWeightedSpanTerms = qse.getWeightedSpanTerms(query,
- tokenStream, field);
- } else {
- this.fieldWeightedSpanTerms = qse.getWeightedSpanTermsWithScores(query,
- tokenStream, field, reader);
- }
- if(qse.isCachedTokenStream()) {
- return qse.getTokenStream();
- }
-
- return null;
- }
-
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.search.highlight.Scorer#startFragment(org.apache.lucene.search.highlight.TextFragment)
- */
- public void startFragment(TextFragment newFragment) {
- foundTerms = new HashSet<String>();
- totalScore = 0;
- }
-
- /**
- * @return true if multi-term queries should be expanded
- */
- public boolean isExpandMultiTermQuery() {
- return expandMultiTermQuery;
- }
-
- /**
- * Controls whether or not multi-term queries are expanded
- * against a {@link MemoryIndex} {@link IndexReader}.
- *
- * @param expandMultiTermQuery true if multi-term queries should be expanded
- */
- public void setExpandMultiTermQuery(boolean expandMultiTermQuery) {
- this.expandMultiTermQuery = expandMultiTermQuery;
- }
-
- /**
- * By default, {@link TokenStream}s that are not of the type
- * {@link CachingTokenFilter} are wrapped in a {@link CachingTokenFilter} to
- * ensure an efficient reset - if you are already using a different caching
- * {@link TokenStream} impl and you don't want it to be wrapped, set this to
- * false.
- *
- * @param wrap
- */
- public void setWrapIfNotCachingTokenFilter(boolean wrap) {
- this.wrapToCaching = wrap;
- }
-
- public void setMaxDocCharsToAnalyze(int maxDocCharsToAnalyze) {
- this.maxCharsToAnalyze = maxDocCharsToAnalyze;
- }
-}