1 package org.apache.lucene.search.payloads;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.util.ArrayList;
22 import java.util.Collection;
23 import java.util.Iterator;
24 import java.util.List;
26 import org.apache.lucene.index.IndexReader;
27 import org.apache.lucene.index.Term;
28 import org.apache.lucene.search.BooleanClause;
29 import org.apache.lucene.search.BooleanQuery;
30 import org.apache.lucene.search.DisjunctionMaxQuery;
31 import org.apache.lucene.search.FilteredQuery;
32 import org.apache.lucene.search.MultiPhraseQuery;
33 import org.apache.lucene.search.PhraseQuery;
34 import org.apache.lucene.search.Query;
35 import org.apache.lucene.search.TermQuery;
36 import org.apache.lucene.search.spans.SpanNearQuery;
37 import org.apache.lucene.search.spans.SpanOrQuery;
38 import org.apache.lucene.search.spans.SpanQuery;
39 import org.apache.lucene.search.spans.SpanTermQuery;
40 import org.apache.lucene.search.spans.Spans;
43 * Experimental class to get set of payloads for most standard Lucene queries.
44 * Operates like Highlighter - IndexReader should only contain doc of interest,
45 * best to use MemoryIndex.
47 * @lucene.experimental
50 public class PayloadSpanUtil {
51 private IndexReader reader;
55 * that contains doc with payloads to extract
57 public PayloadSpanUtil(IndexReader reader) {
62 * Query should be rewritten for wild/fuzzy support.
65 * @return payloads Collection
68 public Collection<byte[]> getPayloadsForQuery(Query query) throws IOException {
69 Collection<byte[]> payloads = new ArrayList<byte[]>();
70 queryToSpanQuery(query, payloads);
74 private void queryToSpanQuery(Query query, Collection<byte[]> payloads)
76 if (query instanceof BooleanQuery) {
77 BooleanClause[] queryClauses = ((BooleanQuery) query).getClauses();
79 for (int i = 0; i < queryClauses.length; i++) {
80 if (!queryClauses[i].isProhibited()) {
81 queryToSpanQuery(queryClauses[i].getQuery(), payloads);
85 } else if (query instanceof PhraseQuery) {
86 Term[] phraseQueryTerms = ((PhraseQuery) query).getTerms();
87 SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
88 for (int i = 0; i < phraseQueryTerms.length; i++) {
89 clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
92 int slop = ((PhraseQuery) query).getSlop();
93 boolean inorder = false;
99 SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
100 sp.setBoost(query.getBoost());
101 getPayloads(payloads, sp);
102 } else if (query instanceof TermQuery) {
103 SpanTermQuery stq = new SpanTermQuery(((TermQuery) query).getTerm());
104 stq.setBoost(query.getBoost());
105 getPayloads(payloads, stq);
106 } else if (query instanceof SpanQuery) {
107 getPayloads(payloads, (SpanQuery) query);
108 } else if (query instanceof FilteredQuery) {
109 queryToSpanQuery(((FilteredQuery) query).getQuery(), payloads);
110 } else if (query instanceof DisjunctionMaxQuery) {
112 for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator
114 queryToSpanQuery(iterator.next(), payloads);
117 } else if (query instanceof MultiPhraseQuery) {
118 final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
119 final List<Term[]> termArrays = mpq.getTermArrays();
120 final int[] positions = mpq.getPositions();
121 if (positions.length > 0) {
123 int maxPosition = positions[positions.length - 1];
124 for (int i = 0; i < positions.length - 1; ++i) {
125 if (positions[i] > maxPosition) {
126 maxPosition = positions[i];
130 @SuppressWarnings("unchecked") final List<Query>[] disjunctLists = new List[maxPosition + 1];
131 int distinctPositions = 0;
133 for (int i = 0; i < termArrays.size(); ++i) {
134 final Term[] termArray = termArrays.get(i);
135 List<Query> disjuncts = disjunctLists[positions[i]];
136 if (disjuncts == null) {
137 disjuncts = (disjunctLists[positions[i]] = new ArrayList<Query>(
141 for (final Term term : termArray) {
142 disjuncts.add(new SpanTermQuery(term));
146 int positionGaps = 0;
148 final SpanQuery[] clauses = new SpanQuery[distinctPositions];
149 for (int i = 0; i < disjunctLists.length; ++i) {
150 List<Query> disjuncts = disjunctLists[i];
151 if (disjuncts != null) {
152 clauses[position++] = new SpanOrQuery(disjuncts
153 .toArray(new SpanQuery[disjuncts.size()]));
159 final int slop = mpq.getSlop();
160 final boolean inorder = (slop == 0);
162 SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps,
164 sp.setBoost(query.getBoost());
165 getPayloads(payloads, sp);
170 private void getPayloads(Collection<byte []> payloads, SpanQuery query)
172 Spans spans = query.getSpans(reader);
174 while (spans.next() == true) {
175 if (spans.isPayloadAvailable()) {
176 Collection<byte[]> payload = spans.getPayload();
177 for (byte [] bytes : payload) {