X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java diff --git a/lucene-java-3.4.0/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java b/lucene-java-3.4.0/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java deleted file mode 100644 index 706fb89..0000000 --- a/lucene-java-3.4.0/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java +++ /dev/null @@ -1,273 +0,0 @@ -package org.apache.lucene.search.highlight; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - -import org.apache.lucene.analysis.CachingTokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.memory.MemoryIndex; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.spans.SpanQuery; -import org.apache.lucene.util.StringHelper; - -/** - * {@link Scorer} implementation which scores text fragments by the number of - * unique query terms found. This class converts appropriate {@link Query}s to - * {@link SpanQuery}s and attempts to score only those terms that participated in - * generating the 'hit' on the document. - */ -public class QueryScorer implements Scorer { - private float totalScore; - private Set foundTerms; - private Map fieldWeightedSpanTerms; - private float maxTermWeight; - private int position = -1; - private String defaultField; - private CharTermAttribute termAtt; - private PositionIncrementAttribute posIncAtt; - private boolean expandMultiTermQuery = true; - private Query query; - private String field; - private IndexReader reader; - private boolean skipInitExtractor; - private boolean wrapToCaching = true; - private int maxCharsToAnalyze; - - /** - * @param query Query to use for highlighting - */ - public QueryScorer(Query query) { - init(query, null, null, true); - } - - /** - * @param query Query to use for highlighting - * @param field Field to highlight - pass null to ignore fields - */ - public QueryScorer(Query query, String field) { - init(query, field, null, true); - } - - /** - * @param query Query to use for highlighting - * @param field Field to highlight - pass null to ignore fields - * @param reader {@link IndexReader} to use for quasi tf/idf scoring - */ - public QueryScorer(Query query, IndexReader reader, String field) { - init(query, field, reader, true); - } - - - /** - * @param query to use for highlighting - * @param reader {@link IndexReader} to use for quasi tf/idf scoring - * @param field to highlight - pass null to ignore fields - * @param defaultField - */ - public QueryScorer(Query query, IndexReader reader, String field, String defaultField) { - this.defaultField = StringHelper.intern(defaultField); - init(query, field, reader, true); - } - - /** - * @param defaultField - The default field for queries with the field name unspecified - */ - public QueryScorer(Query query, String field, String defaultField) { - this.defaultField = StringHelper.intern(defaultField); - init(query, field, null, true); - } - - /** - * @param weightedTerms an array of pre-created {@link WeightedSpanTerm}s - */ - public QueryScorer(WeightedSpanTerm[] weightedTerms) { - this.fieldWeightedSpanTerms = new HashMap(weightedTerms.length); - - for (int i = 0; i < weightedTerms.length; i++) { - WeightedSpanTerm existingTerm = fieldWeightedSpanTerms.get(weightedTerms[i].term); - - if ((existingTerm == null) || - (existingTerm.weight < weightedTerms[i].weight)) { - // if a term is defined more than once, always use the highest - // scoring weight - fieldWeightedSpanTerms.put(weightedTerms[i].term, weightedTerms[i]); - maxTermWeight = Math.max(maxTermWeight, weightedTerms[i].getWeight()); - } - } - skipInitExtractor = true; - } - - /* - * (non-Javadoc) - * - * @see org.apache.lucene.search.highlight.Scorer#getFragmentScore() - */ - public float getFragmentScore() { - return totalScore; - } - - /** - * - * @return The highest weighted term (useful for passing to - * GradientFormatter to set top end of coloring scale). - */ - public float getMaxTermWeight() { - return maxTermWeight; - } - - /* - * (non-Javadoc) - * - * @see org.apache.lucene.search.highlight.Scorer#getTokenScore(org.apache.lucene.analysis.Token, - * int) - */ - public float getTokenScore() { - position += posIncAtt.getPositionIncrement(); - String termText = termAtt.toString(); - - WeightedSpanTerm weightedSpanTerm; - - if ((weightedSpanTerm = fieldWeightedSpanTerms.get( - termText)) == null) { - return 0; - } - - if (weightedSpanTerm.positionSensitive && - !weightedSpanTerm.checkPosition(position)) { - return 0; - } - - float score = weightedSpanTerm.getWeight(); - - // found a query term - is it unique in this doc? - if (!foundTerms.contains(termText)) { - totalScore += score; - foundTerms.add(termText); - } - - return score; - } - - /* (non-Javadoc) - * @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream) - */ - public TokenStream init(TokenStream tokenStream) throws IOException { - position = -1; - termAtt = tokenStream.addAttribute(CharTermAttribute.class); - posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class); - if(!skipInitExtractor) { - if(fieldWeightedSpanTerms != null) { - fieldWeightedSpanTerms.clear(); - } - return initExtractor(tokenStream); - } - return null; - } - - /** - * Retrieve the {@link WeightedSpanTerm} for the specified token. Useful for passing - * Span information to a {@link Fragmenter}. - * - * @param token to get {@link WeightedSpanTerm} for - * @return WeightedSpanTerm for token - */ - public WeightedSpanTerm getWeightedSpanTerm(String token) { - return fieldWeightedSpanTerms.get(token); - } - - /** - */ - private void init(Query query, String field, IndexReader reader, boolean expandMultiTermQuery) { - this.reader = reader; - this.expandMultiTermQuery = expandMultiTermQuery; - this.query = query; - this.field = field; - } - - private TokenStream initExtractor(TokenStream tokenStream) throws IOException { - WeightedSpanTermExtractor qse = defaultField == null ? new WeightedSpanTermExtractor() - : new WeightedSpanTermExtractor(defaultField); - qse.setMaxDocCharsToAnalyze(maxCharsToAnalyze); - qse.setExpandMultiTermQuery(expandMultiTermQuery); - qse.setWrapIfNotCachingTokenFilter(wrapToCaching); - if (reader == null) { - this.fieldWeightedSpanTerms = qse.getWeightedSpanTerms(query, - tokenStream, field); - } else { - this.fieldWeightedSpanTerms = qse.getWeightedSpanTermsWithScores(query, - tokenStream, field, reader); - } - if(qse.isCachedTokenStream()) { - return qse.getTokenStream(); - } - - return null; - } - - /* - * (non-Javadoc) - * - * @see org.apache.lucene.search.highlight.Scorer#startFragment(org.apache.lucene.search.highlight.TextFragment) - */ - public void startFragment(TextFragment newFragment) { - foundTerms = new HashSet(); - totalScore = 0; - } - - /** - * @return true if multi-term queries should be expanded - */ - public boolean isExpandMultiTermQuery() { - return expandMultiTermQuery; - } - - /** - * Controls whether or not multi-term queries are expanded - * against a {@link MemoryIndex} {@link IndexReader}. - * - * @param expandMultiTermQuery true if multi-term queries should be expanded - */ - public void setExpandMultiTermQuery(boolean expandMultiTermQuery) { - this.expandMultiTermQuery = expandMultiTermQuery; - } - - /** - * By default, {@link TokenStream}s that are not of the type - * {@link CachingTokenFilter} are wrapped in a {@link CachingTokenFilter} to - * ensure an efficient reset - if you are already using a different caching - * {@link TokenStream} impl and you don't want it to be wrapped, set this to - * false. - * - * @param wrap - */ - public void setWrapIfNotCachingTokenFilter(boolean wrap) { - this.wrapToCaching = wrap; - } - - public void setMaxDocCharsToAnalyze(int maxDocCharsToAnalyze) { - this.maxCharsToAnalyze = maxDocCharsToAnalyze; - } -}