1 package org.apache.lucene.search.join;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
23 import org.apache.lucene.index.IndexReader;
24 import org.apache.lucene.index.IndexWriter; // javadocs
25 import org.apache.lucene.index.Term;
26 import org.apache.lucene.search.BooleanClause;
27 import org.apache.lucene.search.DocIdSet;
28 import org.apache.lucene.search.DocIdSetIterator;
29 import org.apache.lucene.search.Explanation;
30 import org.apache.lucene.search.Filter;
31 import org.apache.lucene.search.Query;
32 import org.apache.lucene.search.Scorer;
33 import org.apache.lucene.search.Searcher;
34 import org.apache.lucene.search.Weight;
35 import org.apache.lucene.search.grouping.TopGroups;
36 import org.apache.lucene.util.ArrayUtil;
37 import org.apache.lucene.util.FixedBitSet;
40 * This query requires that you index
41 * children and parent docs as a single block, using the
42 * {@link IndexWriter#addDocuments} or {@link
43 * IndexWriter#updateDocuments} API. In each block, the
44 * child documents must appear first, ending with the parent
45 * document. At search time you provide a Filter
46 * identifying the parents, however this Filter must provide
47 * an {@link FixedBitSet} per sub-reader.
49 * <p>Once the block index is built, use this query to wrap
50 * any sub-query matching only child docs and join matches in that
51 * child document space up to the parent document space.
52 * You can then use this Query as a clause with
53 * other queries in the parent document space.</p>
55 * <p>The child documents must be orthogonal to the parent
56 * documents: the wrapped child query must never
57 * return a parent document.</p>
59 * If you'd like to retrieve {@link TopGroups} for the
60 * resulting query, use the {@link BlockJoinCollector}.
61 * Note that this is not necessary, ie, if you simply want
62 * to collect the parent documents and don't need to see
63 * which child documents matched under that parent, then
64 * you can use any collector.
66 * <p><b>NOTE</b>: If the overall query contains parent-only
67 * matches, for example you OR a parent-only query with a
68 * joined child-only query, then the resulting collected documents
69 * will be correct, however the {@link TopGroups} you get
70 * from {@link BlockJoinCollector} will not contain every
71 * child for parents that had matched.
73 * <p>See {@link org.apache.lucene.search.join} for an
76 * @lucene.experimental
79 public class BlockJoinQuery extends Query {
81 public static enum ScoreMode {None, Avg, Max, Total};
83 private final Filter parentsFilter;
84 private final Query childQuery;
86 // If we are rewritten, this is the original childQuery we
87 // were passed; we use this for .equals() and
88 // .hashCode(). This makes rewritten query equal the
89 // original, so that user does not have to .rewrite() their
90 // query before searching:
91 private final Query origChildQuery;
92 private final ScoreMode scoreMode;
94 public BlockJoinQuery(Query childQuery, Filter parentsFilter, ScoreMode scoreMode) {
96 this.origChildQuery = childQuery;
97 this.childQuery = childQuery;
98 this.parentsFilter = parentsFilter;
99 this.scoreMode = scoreMode;
102 private BlockJoinQuery(Query origChildQuery, Query childQuery, Filter parentsFilter, ScoreMode scoreMode) {
104 this.origChildQuery = origChildQuery;
105 this.childQuery = childQuery;
106 this.parentsFilter = parentsFilter;
107 this.scoreMode = scoreMode;
111 public Weight createWeight(Searcher searcher) throws IOException {
112 return new BlockJoinWeight(this, childQuery.createWeight(searcher), parentsFilter, scoreMode);
115 private static class BlockJoinWeight extends Weight {
116 private final Query joinQuery;
117 private final Weight childWeight;
118 private final Filter parentsFilter;
119 private final ScoreMode scoreMode;
121 public BlockJoinWeight(Query joinQuery, Weight childWeight, Filter parentsFilter, ScoreMode scoreMode) {
123 this.joinQuery = joinQuery;
124 this.childWeight = childWeight;
125 this.parentsFilter = parentsFilter;
126 this.scoreMode = scoreMode;
130 public Query getQuery() {
135 public float getValue() {
136 return childWeight.getValue();
140 public float sumOfSquaredWeights() throws IOException {
141 return childWeight.sumOfSquaredWeights() * joinQuery.getBoost() * joinQuery.getBoost();
145 public void normalize(float norm) {
146 childWeight.normalize(norm * joinQuery.getBoost());
150 public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException {
151 // Pass scoreDocsInOrder true, topScorer false to our sub:
152 final Scorer childScorer = childWeight.scorer(reader, true, false);
154 if (childScorer == null) {
159 final int firstChildDoc = childScorer.nextDoc();
160 if (firstChildDoc == DocIdSetIterator.NO_MORE_DOCS) {
165 final DocIdSet parents = parentsFilter.getDocIdSet(reader);
166 // TODO: once we do random-access filters we can
168 if (parents == null) {
172 if (!(parents instanceof FixedBitSet)) {
173 throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents);
176 return new BlockJoinScorer(this, childScorer, (FixedBitSet) parents, firstChildDoc, scoreMode);
180 public Explanation explain(IndexReader reader, int doc) throws IOException {
182 throw new UnsupportedOperationException(getClass().getName() +
183 " cannot explain match on parent document");
187 public boolean scoresDocsOutOfOrder() {
192 static class BlockJoinScorer extends Scorer {
193 private final Scorer childScorer;
194 private final FixedBitSet parentBits;
195 private final ScoreMode scoreMode;
196 private int parentDoc;
197 private float parentScore;
198 private int nextChildDoc;
200 private int[] pendingChildDocs = new int[5];
201 private float[] pendingChildScores;
202 private int childDocUpto;
204 public BlockJoinScorer(Weight weight, Scorer childScorer, FixedBitSet parentBits, int firstChildDoc, ScoreMode scoreMode) {
206 //System.out.println("Q.init firstChildDoc=" + firstChildDoc);
207 this.parentBits = parentBits;
208 this.childScorer = childScorer;
209 this.scoreMode = scoreMode;
210 if (scoreMode != ScoreMode.None) {
211 pendingChildScores = new float[5];
213 nextChildDoc = firstChildDoc;
217 public void visitSubScorers(Query parent, BooleanClause.Occur relationship,
218 ScorerVisitor<Query, Query, Scorer> visitor) {
219 super.visitSubScorers(parent, relationship, visitor);
220 //childScorer.visitSubScorers(weight.getQuery(), BooleanClause.Occur.MUST, visitor);
221 childScorer.visitScorers(visitor);
224 int getChildCount() {
228 int[] swapChildDocs(int[] other) {
229 final int[] ret = pendingChildDocs;
231 pendingChildDocs = new int[5];
233 pendingChildDocs = other;
238 float[] swapChildScores(float[] other) {
239 if (scoreMode == ScoreMode.None) {
240 throw new IllegalStateException("ScoreMode is None");
242 final float[] ret = pendingChildScores;
244 pendingChildScores = new float[5];
246 pendingChildScores = other;
252 public int nextDoc() throws IOException {
253 //System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc);
255 if (nextChildDoc == NO_MORE_DOCS) {
256 //System.out.println(" end");
257 return parentDoc = NO_MORE_DOCS;
260 // Gather all children sharing the same parent as nextChildDoc
261 parentDoc = parentBits.nextSetBit(nextChildDoc);
262 //System.out.println(" parentDoc=" + parentDoc);
263 assert parentDoc != -1;
265 float totalScore = 0;
266 float maxScore = Float.NEGATIVE_INFINITY;
270 //System.out.println(" c=" + nextChildDoc);
271 if (pendingChildDocs.length == childDocUpto) {
272 pendingChildDocs = ArrayUtil.grow(pendingChildDocs);
273 if (scoreMode != ScoreMode.None) {
274 pendingChildScores = ArrayUtil.grow(pendingChildScores);
277 pendingChildDocs[childDocUpto] = nextChildDoc;
278 if (scoreMode != ScoreMode.None) {
279 // TODO: specialize this into dedicated classes per-scoreMode
280 final float childScore = childScorer.score();
281 pendingChildScores[childDocUpto] = childScore;
282 maxScore = Math.max(childScore, maxScore);
283 totalScore += childScore;
286 nextChildDoc = childScorer.nextDoc();
287 } while (nextChildDoc < parentDoc);
288 //System.out.println(" nextChildDoc=" + nextChildDoc);
290 // Parent & child docs are supposed to be orthogonal:
291 assert nextChildDoc != parentDoc;
295 parentScore = totalScore / childDocUpto;
298 parentScore = maxScore;
301 parentScore = totalScore;
307 //System.out.println(" return parentDoc=" + parentDoc);
317 public float score() throws IOException {
322 public int advance(int parentTarget) throws IOException {
324 //System.out.println("Q.advance parentTarget=" + parentTarget);
325 if (parentTarget == NO_MORE_DOCS) {
326 return parentDoc = NO_MORE_DOCS;
329 // Every parent must have at least one child:
330 assert parentTarget != 0;
332 final int prevParentDoc = parentBits.prevSetBit(parentTarget-1);
334 //System.out.println(" rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc);
335 assert prevParentDoc >= parentDoc;
336 if (prevParentDoc > nextChildDoc) {
337 nextChildDoc = childScorer.advance(prevParentDoc);
338 // System.out.println(" childScorer advanced to child docID=" + nextChildDoc);
340 //System.out.println(" skip childScorer advance");
343 // Parent & child docs are supposed to be orthogonal:
344 assert nextChildDoc != prevParentDoc;
346 final int nd = nextDoc();
347 //System.out.println(" return nextParentDoc=" + nd);
353 public void extractTerms(Set<Term> terms) {
354 childQuery.extractTerms(terms);
358 public Query rewrite(IndexReader reader) throws IOException {
359 final Query childRewrite = childQuery.rewrite(reader);
360 if (childRewrite != childQuery) {
361 Query rewritten = new BlockJoinQuery(childQuery,
365 rewritten.setBoost(getBoost());
373 public String toString(String field) {
374 return "BlockJoinQuery ("+childQuery.toString()+")";
378 public boolean equals(Object _other) {
379 if (_other instanceof BlockJoinQuery) {
380 final BlockJoinQuery other = (BlockJoinQuery) _other;
381 return origChildQuery.equals(other.origChildQuery) &&
382 parentsFilter.equals(other.parentsFilter) &&
383 scoreMode == other.scoreMode;
390 public int hashCode() {
391 final int prime = 31;
393 hash = prime * hash + origChildQuery.hashCode();
394 hash = prime * hash + scoreMode.hashCode();
395 hash = prime * hash + parentsFilter.hashCode();
400 public Object clone() {
401 return new BlockJoinQuery((Query) origChildQuery.clone(),