+++ /dev/null
-package org.apache.lucene.search.vectorhighlight;
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.DisjunctionMaxQuery;
-import org.apache.lucene.search.PhraseQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
-
-/**
- * FieldQuery breaks down query object into terms/phrases and keep
- * them in QueryPhraseMap structure.
- */
-public class FieldQuery {
-
- final boolean fieldMatch;
-
- // fieldMatch==true, Map<fieldName,QueryPhraseMap>
- // fieldMatch==false, Map<null,QueryPhraseMap>
- Map<String, QueryPhraseMap> rootMaps = new HashMap<String, QueryPhraseMap>();
-
- // fieldMatch==true, Map<fieldName,setOfTermsInQueries>
- // fieldMatch==false, Map<null,setOfTermsInQueries>
- Map<String, Set<String>> termSetMap = new HashMap<String, Set<String>>();
-
- int termOrPhraseNumber; // used for colored tag support
-
- FieldQuery( Query query, boolean phraseHighlight, boolean fieldMatch ){
- this.fieldMatch = fieldMatch;
- Set<Query> flatQueries = new HashSet<Query>();
- flatten( query, flatQueries );
- saveTerms( flatQueries );
- Collection<Query> expandQueries = expand( flatQueries );
-
- for( Query flatQuery : expandQueries ){
- QueryPhraseMap rootMap = getRootMap( flatQuery );
- rootMap.add( flatQuery );
- if( !phraseHighlight && flatQuery instanceof PhraseQuery ){
- PhraseQuery pq = (PhraseQuery)flatQuery;
- if( pq.getTerms().length > 1 ){
- for( Term term : pq.getTerms() )
- rootMap.addTerm( term, flatQuery.getBoost() );
- }
- }
- }
- }
-
- void flatten( Query sourceQuery, Collection<Query> flatQueries ){
- if( sourceQuery instanceof BooleanQuery ){
- BooleanQuery bq = (BooleanQuery)sourceQuery;
- for( BooleanClause clause : bq.getClauses() ){
- if( !clause.isProhibited() )
- flatten( clause.getQuery(), flatQueries );
- }
- }
- else if( sourceQuery instanceof DisjunctionMaxQuery ){
- DisjunctionMaxQuery dmq = (DisjunctionMaxQuery)sourceQuery;
- for( Query query : dmq ){
- flatten( query, flatQueries );
- }
- }
- else if( sourceQuery instanceof TermQuery ){
- if( !flatQueries.contains( sourceQuery ) )
- flatQueries.add( sourceQuery );
- }
- else if( sourceQuery instanceof PhraseQuery ){
- if( !flatQueries.contains( sourceQuery ) ){
- PhraseQuery pq = (PhraseQuery)sourceQuery;
- if( pq.getTerms().length > 1 )
- flatQueries.add( pq );
- else if( pq.getTerms().length == 1 ){
- flatQueries.add( new TermQuery( pq.getTerms()[0] ) );
- }
- }
- }
- // else discard queries
- }
-
- /*
- * Create expandQueries from flatQueries.
- *
- * expandQueries := flatQueries + overlapped phrase queries
- *
- * ex1) flatQueries={a,b,c}
- * => expandQueries={a,b,c}
- * ex2) flatQueries={a,"b c","c d"}
- * => expandQueries={a,"b c","c d","b c d"}
- */
- Collection<Query> expand( Collection<Query> flatQueries ){
- Set<Query> expandQueries = new HashSet<Query>();
- for( Iterator<Query> i = flatQueries.iterator(); i.hasNext(); ){
- Query query = i.next();
- i.remove();
- expandQueries.add( query );
- if( !( query instanceof PhraseQuery ) ) continue;
- for( Iterator<Query> j = flatQueries.iterator(); j.hasNext(); ){
- Query qj = j.next();
- if( !( qj instanceof PhraseQuery ) ) continue;
- checkOverlap( expandQueries, (PhraseQuery)query, (PhraseQuery)qj );
- }
- }
- return expandQueries;
- }
-
- /*
- * Check if PhraseQuery A and B have overlapped part.
- *
- * ex1) A="a b", B="b c" => overlap; expandQueries={"a b c"}
- * ex2) A="b c", B="a b" => overlap; expandQueries={"a b c"}
- * ex3) A="a b", B="c d" => no overlap; expandQueries={}
- */
- private void checkOverlap( Collection<Query> expandQueries, PhraseQuery a, PhraseQuery b ){
- if( a.getSlop() != b.getSlop() ) return;
- Term[] ats = a.getTerms();
- Term[] bts = b.getTerms();
- if( fieldMatch && !ats[0].field().equals( bts[0].field() ) ) return;
- checkOverlap( expandQueries, ats, bts, a.getSlop(), a.getBoost() );
- checkOverlap( expandQueries, bts, ats, b.getSlop(), b.getBoost() );
- }
-
- /*
- * Check if src and dest have overlapped part and if it is, create PhraseQueries and add expandQueries.
- *
- * ex1) src="a b", dest="c d" => no overlap
- * ex2) src="a b", dest="a b c" => no overlap
- * ex3) src="a b", dest="b c" => overlap; expandQueries={"a b c"}
- * ex4) src="a b c", dest="b c d" => overlap; expandQueries={"a b c d"}
- * ex5) src="a b c", dest="b c" => no overlap
- * ex6) src="a b c", dest="b" => no overlap
- * ex7) src="a a a a", dest="a a a" => overlap;
- * expandQueries={"a a a a a","a a a a a a"}
- * ex8) src="a b c d", dest="b c" => no overlap
- */
- private void checkOverlap( Collection<Query> expandQueries, Term[] src, Term[] dest, int slop, float boost ){
- // beginning from 1 (not 0) is safe because that the PhraseQuery has multiple terms
- // is guaranteed in flatten() method (if PhraseQuery has only one term, flatten()
- // converts PhraseQuery to TermQuery)
- for( int i = 1; i < src.length; i++ ){
- boolean overlap = true;
- for( int j = i; j < src.length; j++ ){
- if( ( j - i ) < dest.length && !src[j].text().equals( dest[j-i].text() ) ){
- overlap = false;
- break;
- }
- }
- if( overlap && src.length - i < dest.length ){
- PhraseQuery pq = new PhraseQuery();
- for( Term srcTerm : src )
- pq.add( srcTerm );
- for( int k = src.length - i; k < dest.length; k++ ){
- pq.add( new Term( src[0].field(), dest[k].text() ) );
- }
- pq.setSlop( slop );
- pq.setBoost( boost );
- if(!expandQueries.contains( pq ) )
- expandQueries.add( pq );
- }
- }
- }
-
- QueryPhraseMap getRootMap( Query query ){
- String key = getKey( query );
- QueryPhraseMap map = rootMaps.get( key );
- if( map == null ){
- map = new QueryPhraseMap( this );
- rootMaps.put( key, map );
- }
- return map;
- }
-
- /*
- * Return 'key' string. 'key' is the field name of the Query.
- * If not fieldMatch, 'key' will be null.
- */
- private String getKey( Query query ){
- if( !fieldMatch ) return null;
- if( query instanceof TermQuery )
- return ((TermQuery)query).getTerm().field();
- else if ( query instanceof PhraseQuery ){
- PhraseQuery pq = (PhraseQuery)query;
- Term[] terms = pq.getTerms();
- return terms[0].field();
- }
- else
- throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." );
- }
-
- /*
- * Save the set of terms in the queries to termSetMap.
- *
- * ex1) q=name:john
- * - fieldMatch==true
- * termSetMap=Map<"name",Set<"john">>
- * - fieldMatch==false
- * termSetMap=Map<null,Set<"john">>
- *
- * ex2) q=name:john title:manager
- * - fieldMatch==true
- * termSetMap=Map<"name",Set<"john">,
- * "title",Set<"manager">>
- * - fieldMatch==false
- * termSetMap=Map<null,Set<"john","manager">>
- *
- * ex3) q=name:"john lennon"
- * - fieldMatch==true
- * termSetMap=Map<"name",Set<"john","lennon">>
- * - fieldMatch==false
- * termSetMap=Map<null,Set<"john","lennon">>
- */
- void saveTerms( Collection<Query> flatQueries ){
- for( Query query : flatQueries ){
- Set<String> termSet = getTermSet( query );
- if( query instanceof TermQuery )
- termSet.add( ((TermQuery)query).getTerm().text() );
- else if( query instanceof PhraseQuery ){
- for( Term term : ((PhraseQuery)query).getTerms() )
- termSet.add( term.text() );
- }
- else
- throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." );
- }
- }
-
- private Set<String> getTermSet( Query query ){
- String key = getKey( query );
- Set<String> set = termSetMap.get( key );
- if( set == null ){
- set = new HashSet<String>();
- termSetMap.put( key, set );
- }
- return set;
- }
-
- Set<String> getTermSet( String field ){
- return termSetMap.get( fieldMatch ? field : null );
- }
-
- /**
- *
- * @param fieldName
- * @param term
- * @return QueryPhraseMap
- */
- public QueryPhraseMap getFieldTermMap( String fieldName, String term ){
- QueryPhraseMap rootMap = getRootMap( fieldName );
- return rootMap == null ? null : rootMap.subMap.get( term );
- }
-
- /**
- *
- * @param fieldName
- * @param phraseCandidate
- * @return QueryPhraseMap
- */
- public QueryPhraseMap searchPhrase( String fieldName, final List<TermInfo> phraseCandidate ){
- QueryPhraseMap root = getRootMap( fieldName );
- if( root == null ) return null;
- return root.searchPhrase( phraseCandidate );
- }
-
- private QueryPhraseMap getRootMap( String fieldName ){
- return rootMaps.get( fieldMatch ? fieldName : null );
- }
-
- int nextTermOrPhraseNumber(){
- return termOrPhraseNumber++;
- }
-
- public static class QueryPhraseMap {
-
- boolean terminal;
- int slop; // valid if terminal == true and phraseHighlight == true
- float boost; // valid if terminal == true
- int termOrPhraseNumber; // valid if terminal == true
- FieldQuery fieldQuery;
- Map<String, QueryPhraseMap> subMap = new HashMap<String, QueryPhraseMap>();
-
- public QueryPhraseMap( FieldQuery fieldQuery ){
- this.fieldQuery = fieldQuery;
- }
-
- void addTerm( Term term, float boost ){
- QueryPhraseMap map = getOrNewMap( subMap, term.text() );
- map.markTerminal( boost );
- }
-
- private QueryPhraseMap getOrNewMap( Map<String, QueryPhraseMap> subMap, String term ){
- QueryPhraseMap map = subMap.get( term );
- if( map == null ){
- map = new QueryPhraseMap( fieldQuery );
- subMap.put( term, map );
- }
- return map;
- }
-
- void add( Query query ){
- if( query instanceof TermQuery ){
- addTerm( ((TermQuery)query).getTerm(), query.getBoost() );
- }
- else if( query instanceof PhraseQuery ){
- PhraseQuery pq = (PhraseQuery)query;
- Term[] terms = pq.getTerms();
- Map<String, QueryPhraseMap> map = subMap;
- QueryPhraseMap qpm = null;
- for( Term term : terms ){
- qpm = getOrNewMap( map, term.text() );
- map = qpm.subMap;
- }
- qpm.markTerminal( pq.getSlop(), pq.getBoost() );
- }
- else
- throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." );
- }
-
- public QueryPhraseMap getTermMap( String term ){
- return subMap.get( term );
- }
-
- private void markTerminal( float boost ){
- markTerminal( 0, boost );
- }
-
- private void markTerminal( int slop, float boost ){
- this.terminal = true;
- this.slop = slop;
- this.boost = boost;
- this.termOrPhraseNumber = fieldQuery.nextTermOrPhraseNumber();
- }
-
- public boolean isTerminal(){
- return terminal;
- }
-
- public int getSlop(){
- return slop;
- }
-
- public float getBoost(){
- return boost;
- }
-
- public int getTermOrPhraseNumber(){
- return termOrPhraseNumber;
- }
-
- public QueryPhraseMap searchPhrase( final List<TermInfo> phraseCandidate ){
- QueryPhraseMap currMap = this;
- for( TermInfo ti : phraseCandidate ){
- currMap = currMap.subMap.get( ti.getText() );
- if( currMap == null ) return null;
- }
- return currMap.isValidTermOrPhrase( phraseCandidate ) ? currMap : null;
- }
-
- public boolean isValidTermOrPhrase( final List<TermInfo> phraseCandidate ){
- // check terminal
- if( !terminal ) return false;
-
- // if the candidate is a term, it is valid
- if( phraseCandidate.size() == 1 ) return true;
-
- // else check whether the candidate is valid phrase
- // compare position-gaps between terms to slop
- int pos = phraseCandidate.get( 0 ).getPosition();
- for( int i = 1; i < phraseCandidate.size(); i++ ){
- int nextPos = phraseCandidate.get( i ).getPosition();
- if( Math.abs( nextPos - pos - 1 ) > slop ) return false;
- pos = nextPos;
- }
- return true;
- }
- }
-}