1 package org.apache.lucene.search.vectorhighlight;
3 * Licensed to the Apache Software Foundation (ASF) under one or more
4 * contributor license agreements. See the NOTICE file distributed with
5 * this work for additional information regarding copyright ownership.
6 * The ASF licenses this file to You under the Apache License, Version 2.0
7 * (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
19 import java.util.Collection;
20 import java.util.HashMap;
21 import java.util.HashSet;
22 import java.util.Iterator;
23 import java.util.List;
27 import org.apache.lucene.index.Term;
28 import org.apache.lucene.search.BooleanClause;
29 import org.apache.lucene.search.BooleanQuery;
30 import org.apache.lucene.search.DisjunctionMaxQuery;
31 import org.apache.lucene.search.PhraseQuery;
32 import org.apache.lucene.search.Query;
33 import org.apache.lucene.search.TermQuery;
34 import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
37 * FieldQuery breaks down query object into terms/phrases and keep
38 * them in QueryPhraseMap structure.
40 public class FieldQuery {
42 final boolean fieldMatch;
44 // fieldMatch==true, Map<fieldName,QueryPhraseMap>
45 // fieldMatch==false, Map<null,QueryPhraseMap>
46 Map<String, QueryPhraseMap> rootMaps = new HashMap<String, QueryPhraseMap>();
48 // fieldMatch==true, Map<fieldName,setOfTermsInQueries>
49 // fieldMatch==false, Map<null,setOfTermsInQueries>
50 Map<String, Set<String>> termSetMap = new HashMap<String, Set<String>>();
52 int termOrPhraseNumber; // used for colored tag support
54 FieldQuery( Query query, boolean phraseHighlight, boolean fieldMatch ){
55 this.fieldMatch = fieldMatch;
56 Set<Query> flatQueries = new HashSet<Query>();
57 flatten( query, flatQueries );
58 saveTerms( flatQueries );
59 Collection<Query> expandQueries = expand( flatQueries );
61 for( Query flatQuery : expandQueries ){
62 QueryPhraseMap rootMap = getRootMap( flatQuery );
63 rootMap.add( flatQuery );
64 if( !phraseHighlight && flatQuery instanceof PhraseQuery ){
65 PhraseQuery pq = (PhraseQuery)flatQuery;
66 if( pq.getTerms().length > 1 ){
67 for( Term term : pq.getTerms() )
68 rootMap.addTerm( term, flatQuery.getBoost() );
74 void flatten( Query sourceQuery, Collection<Query> flatQueries ){
75 if( sourceQuery instanceof BooleanQuery ){
76 BooleanQuery bq = (BooleanQuery)sourceQuery;
77 for( BooleanClause clause : bq.getClauses() ){
78 if( !clause.isProhibited() )
79 flatten( clause.getQuery(), flatQueries );
82 else if( sourceQuery instanceof DisjunctionMaxQuery ){
83 DisjunctionMaxQuery dmq = (DisjunctionMaxQuery)sourceQuery;
84 for( Query query : dmq ){
85 flatten( query, flatQueries );
88 else if( sourceQuery instanceof TermQuery ){
89 if( !flatQueries.contains( sourceQuery ) )
90 flatQueries.add( sourceQuery );
92 else if( sourceQuery instanceof PhraseQuery ){
93 if( !flatQueries.contains( sourceQuery ) ){
94 PhraseQuery pq = (PhraseQuery)sourceQuery;
95 if( pq.getTerms().length > 1 )
96 flatQueries.add( pq );
97 else if( pq.getTerms().length == 1 ){
98 flatQueries.add( new TermQuery( pq.getTerms()[0] ) );
102 // else discard queries
106 * Create expandQueries from flatQueries.
108 * expandQueries := flatQueries + overlapped phrase queries
110 * ex1) flatQueries={a,b,c}
111 * => expandQueries={a,b,c}
112 * ex2) flatQueries={a,"b c","c d"}
113 * => expandQueries={a,"b c","c d","b c d"}
115 Collection<Query> expand( Collection<Query> flatQueries ){
116 Set<Query> expandQueries = new HashSet<Query>();
117 for( Iterator<Query> i = flatQueries.iterator(); i.hasNext(); ){
118 Query query = i.next();
120 expandQueries.add( query );
121 if( !( query instanceof PhraseQuery ) ) continue;
122 for( Iterator<Query> j = flatQueries.iterator(); j.hasNext(); ){
124 if( !( qj instanceof PhraseQuery ) ) continue;
125 checkOverlap( expandQueries, (PhraseQuery)query, (PhraseQuery)qj );
128 return expandQueries;
132 * Check if PhraseQuery A and B have overlapped part.
134 * ex1) A="a b", B="b c" => overlap; expandQueries={"a b c"}
135 * ex2) A="b c", B="a b" => overlap; expandQueries={"a b c"}
136 * ex3) A="a b", B="c d" => no overlap; expandQueries={}
138 private void checkOverlap( Collection<Query> expandQueries, PhraseQuery a, PhraseQuery b ){
139 if( a.getSlop() != b.getSlop() ) return;
140 Term[] ats = a.getTerms();
141 Term[] bts = b.getTerms();
142 if( fieldMatch && !ats[0].field().equals( bts[0].field() ) ) return;
143 checkOverlap( expandQueries, ats, bts, a.getSlop(), a.getBoost() );
144 checkOverlap( expandQueries, bts, ats, b.getSlop(), b.getBoost() );
148 * Check if src and dest have overlapped part and if it is, create PhraseQueries and add expandQueries.
150 * ex1) src="a b", dest="c d" => no overlap
151 * ex2) src="a b", dest="a b c" => no overlap
152 * ex3) src="a b", dest="b c" => overlap; expandQueries={"a b c"}
153 * ex4) src="a b c", dest="b c d" => overlap; expandQueries={"a b c d"}
154 * ex5) src="a b c", dest="b c" => no overlap
155 * ex6) src="a b c", dest="b" => no overlap
156 * ex7) src="a a a a", dest="a a a" => overlap;
157 * expandQueries={"a a a a a","a a a a a a"}
158 * ex8) src="a b c d", dest="b c" => no overlap
160 private void checkOverlap( Collection<Query> expandQueries, Term[] src, Term[] dest, int slop, float boost ){
161 // beginning from 1 (not 0) is safe because that the PhraseQuery has multiple terms
162 // is guaranteed in flatten() method (if PhraseQuery has only one term, flatten()
163 // converts PhraseQuery to TermQuery)
164 for( int i = 1; i < src.length; i++ ){
165 boolean overlap = true;
166 for( int j = i; j < src.length; j++ ){
167 if( ( j - i ) < dest.length && !src[j].text().equals( dest[j-i].text() ) ){
172 if( overlap && src.length - i < dest.length ){
173 PhraseQuery pq = new PhraseQuery();
174 for( Term srcTerm : src )
176 for( int k = src.length - i; k < dest.length; k++ ){
177 pq.add( new Term( src[0].field(), dest[k].text() ) );
180 pq.setBoost( boost );
181 if(!expandQueries.contains( pq ) )
182 expandQueries.add( pq );
187 QueryPhraseMap getRootMap( Query query ){
188 String key = getKey( query );
189 QueryPhraseMap map = rootMaps.get( key );
191 map = new QueryPhraseMap( this );
192 rootMaps.put( key, map );
198 * Return 'key' string. 'key' is the field name of the Query.
199 * If not fieldMatch, 'key' will be null.
201 private String getKey( Query query ){
202 if( !fieldMatch ) return null;
203 if( query instanceof TermQuery )
204 return ((TermQuery)query).getTerm().field();
205 else if ( query instanceof PhraseQuery ){
206 PhraseQuery pq = (PhraseQuery)query;
207 Term[] terms = pq.getTerms();
208 return terms[0].field();
211 throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." );
215 * Save the set of terms in the queries to termSetMap.
219 * termSetMap=Map<"name",Set<"john">>
220 * - fieldMatch==false
221 * termSetMap=Map<null,Set<"john">>
223 * ex2) q=name:john title:manager
225 * termSetMap=Map<"name",Set<"john">,
226 * "title",Set<"manager">>
227 * - fieldMatch==false
228 * termSetMap=Map<null,Set<"john","manager">>
230 * ex3) q=name:"john lennon"
232 * termSetMap=Map<"name",Set<"john","lennon">>
233 * - fieldMatch==false
234 * termSetMap=Map<null,Set<"john","lennon">>
236 void saveTerms( Collection<Query> flatQueries ){
237 for( Query query : flatQueries ){
238 Set<String> termSet = getTermSet( query );
239 if( query instanceof TermQuery )
240 termSet.add( ((TermQuery)query).getTerm().text() );
241 else if( query instanceof PhraseQuery ){
242 for( Term term : ((PhraseQuery)query).getTerms() )
243 termSet.add( term.text() );
246 throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." );
250 private Set<String> getTermSet( Query query ){
251 String key = getKey( query );
252 Set<String> set = termSetMap.get( key );
254 set = new HashSet<String>();
255 termSetMap.put( key, set );
260 Set<String> getTermSet( String field ){
261 return termSetMap.get( fieldMatch ? field : null );
268 * @return QueryPhraseMap
270 public QueryPhraseMap getFieldTermMap( String fieldName, String term ){
271 QueryPhraseMap rootMap = getRootMap( fieldName );
272 return rootMap == null ? null : rootMap.subMap.get( term );
278 * @param phraseCandidate
279 * @return QueryPhraseMap
281 public QueryPhraseMap searchPhrase( String fieldName, final List<TermInfo> phraseCandidate ){
282 QueryPhraseMap root = getRootMap( fieldName );
283 if( root == null ) return null;
284 return root.searchPhrase( phraseCandidate );
287 private QueryPhraseMap getRootMap( String fieldName ){
288 return rootMaps.get( fieldMatch ? fieldName : null );
291 int nextTermOrPhraseNumber(){
292 return termOrPhraseNumber++;
295 public static class QueryPhraseMap {
298 int slop; // valid if terminal == true and phraseHighlight == true
299 float boost; // valid if terminal == true
300 int termOrPhraseNumber; // valid if terminal == true
301 FieldQuery fieldQuery;
302 Map<String, QueryPhraseMap> subMap = new HashMap<String, QueryPhraseMap>();
304 public QueryPhraseMap( FieldQuery fieldQuery ){
305 this.fieldQuery = fieldQuery;
308 void addTerm( Term term, float boost ){
309 QueryPhraseMap map = getOrNewMap( subMap, term.text() );
310 map.markTerminal( boost );
313 private QueryPhraseMap getOrNewMap( Map<String, QueryPhraseMap> subMap, String term ){
314 QueryPhraseMap map = subMap.get( term );
316 map = new QueryPhraseMap( fieldQuery );
317 subMap.put( term, map );
322 void add( Query query ){
323 if( query instanceof TermQuery ){
324 addTerm( ((TermQuery)query).getTerm(), query.getBoost() );
326 else if( query instanceof PhraseQuery ){
327 PhraseQuery pq = (PhraseQuery)query;
328 Term[] terms = pq.getTerms();
329 Map<String, QueryPhraseMap> map = subMap;
330 QueryPhraseMap qpm = null;
331 for( Term term : terms ){
332 qpm = getOrNewMap( map, term.text() );
335 qpm.markTerminal( pq.getSlop(), pq.getBoost() );
338 throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." );
341 public QueryPhraseMap getTermMap( String term ){
342 return subMap.get( term );
345 private void markTerminal( float boost ){
346 markTerminal( 0, boost );
349 private void markTerminal( int slop, float boost ){
350 this.terminal = true;
353 this.termOrPhraseNumber = fieldQuery.nextTermOrPhraseNumber();
356 public boolean isTerminal(){
360 public int getSlop(){
364 public float getBoost(){
368 public int getTermOrPhraseNumber(){
369 return termOrPhraseNumber;
372 public QueryPhraseMap searchPhrase( final List<TermInfo> phraseCandidate ){
373 QueryPhraseMap currMap = this;
374 for( TermInfo ti : phraseCandidate ){
375 currMap = currMap.subMap.get( ti.getText() );
376 if( currMap == null ) return null;
378 return currMap.isValidTermOrPhrase( phraseCandidate ) ? currMap : null;
381 public boolean isValidTermOrPhrase( final List<TermInfo> phraseCandidate ){
383 if( !terminal ) return false;
385 // if the candidate is a term, it is valid
386 if( phraseCandidate.size() == 1 ) return true;
388 // else check whether the candidate is valid phrase
389 // compare position-gaps between terms to slop
390 int pos = phraseCandidate.get( 0 ).getPosition();
391 for( int i = 1; i < phraseCandidate.size(); i++ ){
392 int nextPos = phraseCandidate.get( i ).getPosition();
393 if( Math.abs( nextPos - pos - 1 ) > slop ) return false;