--- /dev/null
+package org.apache.lucene.search.payloads;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.DisjunctionMaxQuery;
+import org.apache.lucene.search.FilteredQuery;
+import org.apache.lucene.search.MultiPhraseQuery;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.spans.SpanNearQuery;
+import org.apache.lucene.search.spans.SpanOrQuery;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.search.spans.Spans;
+
+/**
+ * Experimental class to get set of payloads for most standard Lucene queries.
+ * Operates like Highlighter - IndexReader should only contain doc of interest,
+ * best to use MemoryIndex.
+ *
+ * @lucene.experimental
+ *
+ */
+public class PayloadSpanUtil {
+ private IndexReader reader;
+
+ /**
+ * @param reader
+ * that contains doc with payloads to extract
+ */
+ public PayloadSpanUtil(IndexReader reader) {
+ this.reader = reader;
+ }
+
+ /**
+ * Query should be rewritten for wild/fuzzy support.
+ *
+ * @param query
+ * @return payloads Collection
+ * @throws IOException
+ */
+ public Collection<byte[]> getPayloadsForQuery(Query query) throws IOException {
+ Collection<byte[]> payloads = new ArrayList<byte[]>();
+ queryToSpanQuery(query, payloads);
+ return payloads;
+ }
+
+ private void queryToSpanQuery(Query query, Collection<byte[]> payloads)
+ throws IOException {
+ if (query instanceof BooleanQuery) {
+ BooleanClause[] queryClauses = ((BooleanQuery) query).getClauses();
+
+ for (int i = 0; i < queryClauses.length; i++) {
+ if (!queryClauses[i].isProhibited()) {
+ queryToSpanQuery(queryClauses[i].getQuery(), payloads);
+ }
+ }
+
+ } else if (query instanceof PhraseQuery) {
+ Term[] phraseQueryTerms = ((PhraseQuery) query).getTerms();
+ SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
+ for (int i = 0; i < phraseQueryTerms.length; i++) {
+ clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
+ }
+
+ int slop = ((PhraseQuery) query).getSlop();
+ boolean inorder = false;
+
+ if (slop == 0) {
+ inorder = true;
+ }
+
+ SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
+ sp.setBoost(query.getBoost());
+ getPayloads(payloads, sp);
+ } else if (query instanceof TermQuery) {
+ SpanTermQuery stq = new SpanTermQuery(((TermQuery) query).getTerm());
+ stq.setBoost(query.getBoost());
+ getPayloads(payloads, stq);
+ } else if (query instanceof SpanQuery) {
+ getPayloads(payloads, (SpanQuery) query);
+ } else if (query instanceof FilteredQuery) {
+ queryToSpanQuery(((FilteredQuery) query).getQuery(), payloads);
+ } else if (query instanceof DisjunctionMaxQuery) {
+
+ for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator
+ .hasNext();) {
+ queryToSpanQuery(iterator.next(), payloads);
+ }
+
+ } else if (query instanceof MultiPhraseQuery) {
+ final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
+ final List<Term[]> termArrays = mpq.getTermArrays();
+ final int[] positions = mpq.getPositions();
+ if (positions.length > 0) {
+
+ int maxPosition = positions[positions.length - 1];
+ for (int i = 0; i < positions.length - 1; ++i) {
+ if (positions[i] > maxPosition) {
+ maxPosition = positions[i];
+ }
+ }
+
+ @SuppressWarnings("unchecked") final List<Query>[] disjunctLists = new List[maxPosition + 1];
+ int distinctPositions = 0;
+
+ for (int i = 0; i < termArrays.size(); ++i) {
+ final Term[] termArray = termArrays.get(i);
+ List<Query> disjuncts = disjunctLists[positions[i]];
+ if (disjuncts == null) {
+ disjuncts = (disjunctLists[positions[i]] = new ArrayList<Query>(
+ termArray.length));
+ ++distinctPositions;
+ }
+ for (final Term term : termArray) {
+ disjuncts.add(new SpanTermQuery(term));
+ }
+ }
+
+ int positionGaps = 0;
+ int position = 0;
+ final SpanQuery[] clauses = new SpanQuery[distinctPositions];
+ for (int i = 0; i < disjunctLists.length; ++i) {
+ List<Query> disjuncts = disjunctLists[i];
+ if (disjuncts != null) {
+ clauses[position++] = new SpanOrQuery(disjuncts
+ .toArray(new SpanQuery[disjuncts.size()]));
+ } else {
+ ++positionGaps;
+ }
+ }
+
+ final int slop = mpq.getSlop();
+ final boolean inorder = (slop == 0);
+
+ SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps,
+ inorder);
+ sp.setBoost(query.getBoost());
+ getPayloads(payloads, sp);
+ }
+ }
+ }
+
+ private void getPayloads(Collection<byte []> payloads, SpanQuery query)
+ throws IOException {
+ Spans spans = query.getSpans(reader);
+
+ while (spans.next() == true) {
+ if (spans.isPayloadAvailable()) {
+ Collection<byte[]> payload = spans.getPayload();
+ for (byte [] bytes : payload) {
+ payloads.add(bytes);
+ }
+
+ }
+ }
+ }
+}