1 package org.apache.lucene.facet.search;
3 import java.io.IOException;
4 import java.util.ArrayList;
7 import org.apache.lucene.util.PriorityQueue;
9 import org.apache.lucene.facet.search.params.FacetRequest;
10 import org.apache.lucene.facet.search.params.FacetRequest.SortOrder;
11 import org.apache.lucene.facet.search.results.FacetResult;
12 import org.apache.lucene.facet.search.results.FacetResultNode;
13 import org.apache.lucene.facet.search.results.MutableFacetResultNode;
14 import org.apache.lucene.facet.search.results.IntermediateFacetResult;
15 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
16 import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenArrays;
17 import org.apache.lucene.util.collections.IntIterator;
18 import org.apache.lucene.util.collections.IntToObjectMap;
21 * Licensed to the Apache Software Foundation (ASF) under one or more
22 * contributor license agreements. See the NOTICE file distributed with
23 * this work for additional information regarding copyright ownership.
24 * The ASF licenses this file to You under the Apache License, Version 2.0
25 * (the "License"); you may not use this file except in compliance with
26 * the License. You may obtain a copy of the License at
28 * http://www.apache.org/licenses/LICENSE-2.0
30 * Unless required by applicable law or agreed to in writing, software
31 * distributed under the License is distributed on an "AS IS" BASIS,
32 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
33 * See the License for the specific language governing permissions and
34 * limitations under the License.
38 * Generates {@link FacetResult} from the count arrays aggregated for a particular
39 * {@link FacetRequest}.
40 * The generated {@link FacetResult} is a subtree of the taxonomy tree.
41 * Its root node, {@link FacetResult#getFacetResultNode()},
42 * is the facet specified by {@link FacetRequest#getCategoryPath()},
43 * and the enumerated children, {@link FacetResultNode#getSubResults()}, of each node in that
44 * {@link FacetResult} are the top K ( = {@link FacetRequest#getNumResults()}) among its children
46 * Top in the sense {@link FacetRequest#getSortBy()},
47 * which can be by the values aggregated in the count arrays, or by ordinal numbers;
48 * also specified is the sort order, {@link FacetRequest#getSortOrder()},
49 * ascending or descending, of these values or ordinals before their top K are selected.
50 * The depth (number of levels excluding the root) of the
51 * {@link FacetResult} tree is specified by {@link FacetRequest#getDepth()}.
53 * Because the number of selected children of each node is restricted,
54 * and not the overall number of nodes in the {@link FacetResult}, facets not selected
55 * into {@link FacetResult} might have better values, or ordinals, (typically,
56 * higher counts), than facets that are selected into the {@link FacetResult}.
58 * The generated {@link FacetResult} also provides with
59 * {@link FacetResult#getNumValidDescendants()}, which returns the total number of facets
60 * that are descendants of the root node, no deeper than {@link FacetRequest#getDepth()}, and
61 * which have valid value. The rootnode itself is not counted here.
62 * Valid value is determined by the {@link FacetResultsHandler}.
63 * {@link TopKInEachNodeHandler} defines valid as != 0.
65 * <b>NOTE:</b> this code relies on the assumption that {@link TaxonomyReader#INVALID_ORDINAL} == -1, a smaller
66 * value than any valid ordinal.
68 * @lucene.experimental
70 public class TopKInEachNodeHandler extends FacetResultsHandler {
72 public TopKInEachNodeHandler(TaxonomyReader taxonomyReader,
73 FacetRequest facetRequest) {
74 super(taxonomyReader, facetRequest);
78 * Recursively explore all facets that can be potentially included in the
79 * {@link FacetResult} to be generated, and that belong to the given
80 * partition, so that values can be examined and collected. For each such
81 * node, gather its top K ({@link FacetRequest#getNumResults()}) children
82 * among its children that are encountered in the given particular partition
83 * (aka current counting list).
85 * @return {@link IntermediateFacetResult} consisting of
86 * {@link IntToObjectMap} that maps potential
87 * {@link FacetResult} nodes to their top K children encountered in
88 * the current partition. Note that the mapped potential tree nodes
89 * need not belong to the given partition, only the top K children
90 * mapped to. The aim is to identify nodes that are certainly excluded
91 * from the {@link FacetResult} to be eventually (after going through
92 * all the partitions) returned by this handler, because they have K
93 * better siblings, already identified in this partition. For the
94 * identified excluded nodes, we only count number of their
95 * descendants in the subtree (to be included in
96 * {@link FacetResult#getNumValidDescendants()}), but not bother with
97 * selecting top K in these generations, which, by definition, are,
98 * too, excluded from the FacetResult tree.
99 * @param arrays the already filled in count array, potentially only covering
100 * one partition: the ordinals ranging from
101 * @param offset to <code>offset</code> + the length of the count arrays
102 * within <code>arrays</code> (exclusive)
103 * @throws IOException in case
104 * {@link TaxonomyReader#getOrdinal(org.apache.lucene.facet.taxonomy.CategoryPath)}
106 * @see FacetResultsHandler#fetchPartitionResult(FacetArrays, int)
109 public IntermediateFacetResult fetchPartitionResult(FacetArrays arrays, int offset) throws IOException {
111 // get the root of the result tree to be returned, and the depth of that result tree
112 // (depth means number of node levels excluding the root).
113 int rootNode = this.taxonomyReader.getOrdinal(this.facetRequest.getCategoryPath());
114 if (rootNode == TaxonomyReader.INVALID_ORDINAL) {
118 int K = Math.min(facetRequest.getNumResults(),taxonomyReader.getSize()); // number of best results in each node
120 // this will grow into the returned IntermediateFacetResult
121 IntToObjectMap<AACO> AACOsOfOnePartition = new IntToObjectMap<AACO>();
123 int partitionSize = arrays.getArraysLength(); // all partitions, except, possibly, the last,
124 // have the same length. Hence modulo is OK.
126 int depth = facetRequest.getDepth();
129 // Need to only have root node.
130 IntermediateFacetResultWithHash tempFRWH = new IntermediateFacetResultWithHash(
131 facetRequest, AACOsOfOnePartition);
132 if (isSelfPartition(rootNode, arrays, offset)) {
133 tempFRWH.isRootNodeIncluded = true;
134 tempFRWH.rootNodeValue = this.facetRequest.getValueOf(arrays, rootNode % partitionSize);
139 if (depth > Short.MAX_VALUE - 3) {
140 depth = Short.MAX_VALUE -3;
143 int endOffset = offset + partitionSize; // one past the largest ordinal in the partition
144 ChildrenArrays childrenArray = taxonomyReader.getChildrenArrays();
145 int[] youngestChild = childrenArray.getYoungestChildArray();
146 int[] olderSibling = childrenArray.getOlderSiblingArray();
147 int totalNumOfDescendantsConsidered = 0; // total number of facets with value != 0,
148 // in the tree. These include those selected as top K in each node, and all the others that
149 // were not. Not including rootNode
151 // the following priority queue will be used again and again for each node recursed into
152 // to select its best K children among its children encountered in the given partition
153 PriorityQueue<AggregatedCategory> pq =
154 new AggregatedCategoryHeap(K, this.getSuitableACComparator());
156 // reusables will feed the priority queue in each use
157 AggregatedCategory [] reusables = new AggregatedCategory[2+K];
158 for (int i = 0; i < reusables.length; i++) {
159 reusables[i] = new AggregatedCategory(1,0);
163 * The returned map is built by a recursive visit of potential tree nodes. Nodes
164 * determined to be excluded from the FacetResult are not recursively explored as others,
165 * they are only recursed in order to count the number of their descendants.
166 * Also, nodes that they and any of their descendants can not be mapped into facets encountered
167 * in this partition, are, too, explored no further. These are facets whose ordinal
168 * numbers are greater than the ordinals of the given partition. (recall that the Taxonomy
169 * maintains that a parent ordinal is smaller than any of its descendants' ordinals).
170 * So, when scanning over all children of a potential tree node n: (1) all children with ordinal number
171 * greater than those in the given partition are skipped over, (2) among the children of n residing
172 * in this partition, the best K children are selected (using pq) for usual further recursion
173 * and the rest (those rejected out from the pq) are only recursed for counting total number
174 * of descendants, and (3) all the children of ordinal numbers smaller than the given partition
175 * are further explored in the usual way, since these may lead to descendants residing in this partition.
177 * ordinalStack drives the recursive descent.
178 * Top of stack holds the current node which we recurse from.
179 * ordinalStack[0] holds the root of the facetRequest, and
180 * it is always maintained that parent(ordianlStack[i]) = ordinalStack[i-1].
181 * localDepth points to the current top of ordinalStack.
182 * Only top of ordinalStack can be TaxonomyReader.INVALID_ORDINAL, and this if and only if
183 * the element below it explored all its relevant children.
185 int[] ordinalStack = new int[depth+2]; // for 0 and for invalid on top
186 ordinalStack[0] = rootNode;
190 * bestSignlingsStack[i] maintains the best K children of ordinalStack[i-1], namely,
191 * the best K siblings of ordinalStack[i], best K among those residing in the given partition.
192 * Note that the residents of ordinalStack need not belong
193 * to the current partition, only the residents of bestSignlingsStack.
194 * When exploring the children of ordianlStack[i-1] that reside in the current partition
195 * (after the top K of them have been determined and stored into bestSignlingsStack[i]),
196 * siblingExplored[i] points into bestSignlingsStack[i], to the child now explored, hence
197 * residing in ordinalStack[i], and firstToTheLeftOfPartition[i] holds the largest ordinal of
198 * a sibling smaller than the ordinals in the partition.
199 * When siblingExplored[i] == max int, the top K siblings of ordinalStack[i] among those siblings
200 * that reside in this partition have not been determined yet.
201 * if siblingExplored[i] < 0, the node in ordinalStack[i] is to the left of partition
202 * (i.e. of a smaller ordinal than the current partition)
203 * (step (3) above is executed for the children of ordianlStack[i-1])
205 int[][] bestSignlingsStack = new int[depth+2][];
206 int[] siblingExplored = new int[depth+2];
207 int[] firstToTheLeftOfPartition = new int [depth+2];
209 int tosOrdinal; // top of stack element, the ordinal at the top of stack
212 * to start the loop, complete the datastructures for root node:
213 * push its youngest child to ordinalStack; make a note in siblingExplored[] that the children
214 * of rootNode, which reside in the current partition have not been read yet to select the top
215 * K of them. Also, make rootNode as if, related to its parent, rootNode belongs to the children
216 * of ordinal numbers smaller than those of the current partition (this will ease on end condition --
217 * we can continue to the older sibling of rootNode once the localDepth goes down, before we verify that
220 ordinalStack[++localDepth] = youngestChild[rootNode];
221 siblingExplored[localDepth] = Integer.MAX_VALUE; // we have not verified position wrt current partition
222 siblingExplored[0] = -1; // as if rootNode resides to the left of current position
225 * now the whole recursion: loop as long as stack is not empty of elements descendants of
226 * facetRequest's root.
229 while (localDepth > 0) {
230 tosOrdinal = ordinalStack[localDepth];
231 if (tosOrdinal == TaxonomyReader.INVALID_ORDINAL) {
232 // the brotherhood that has been occupying the top of stack is all exhausted.
233 // Hence, element below tos, namely, father of tos, has all its children,
234 // and itself, all explored.
236 // replace this father, now on top of stack, by this father's sibling:
237 // this parent's ordinal can not be greater than current partition, as otherwise
238 // its child, now just removed, would not have been pushed on it.
239 // so the father is either inside the partition, or smaller ordinal
240 if (siblingExplored[localDepth] < 0 ) {
241 ordinalStack[localDepth] = olderSibling[ordinalStack[localDepth]];
244 // in this point, siblingExplored[localDepth] between 0 and number of bestSiblings
245 // it can not be max int
246 siblingExplored[localDepth]--;
247 if (siblingExplored[localDepth] == -1 ) {
248 //siblings residing in the partition have been all processed, we now move
249 // to those of ordinal numbers smaller than the partition
250 ordinalStack[localDepth] = firstToTheLeftOfPartition[localDepth];
252 // still explore siblings residing in the partition
253 // just move to the next one
254 ordinalStack[localDepth] = bestSignlingsStack[localDepth][siblingExplored[localDepth]];
257 } // endof tosOrdinal is invalid, and hence removed, and its parent was replaced by this
260 // now try to push a kid, but first look at tos whether it 'deserves' its kids explored:
261 // it is not to the right of current partition, and we know whether to only count or to
262 // select best K siblings.
263 if (siblingExplored[localDepth] == Integer.MAX_VALUE) {
264 //tosOrdinal was not examined yet for its position relative to current partition
265 // and the best K of current partition, among its siblings, have not been determined yet
266 while (tosOrdinal >= endOffset) {
267 tosOrdinal = olderSibling[tosOrdinal];
269 // now it is inside. Run it and all its siblings inside the partition through a heap
270 // and in doing so, count them, find best K, and sum into residue
271 double residue = 0f; // the sum of all the siblings from this partition that do not make
275 //reusables are consumed as from a stack. The stack starts full and returns full.
276 int tosReuslables = reusables.length -1;
278 while (tosOrdinal >= offset) { // while tosOrdinal belongs to the given partition; here, too, we use the fact
279 // that TaxonomyReader.INVALID_ORDINAL == -1 < offset
280 double value = facetRequest.getValueOf(arrays, tosOrdinal % partitionSize);
281 if (value != 0) { // the value of yc is not 0, it is to be considered.
282 totalNumOfDescendantsConsidered++;
284 // consume one reusable, and push to the priority queue
285 AggregatedCategory ac = reusables[tosReuslables--];
286 ac.ordinal = tosOrdinal;
288 ac = pq.insertWithOverflow(ac);
291 // TODO (Facet): could it be that we need to do something
292 // else, not add, depending on the aggregator?
294 /* when a facet is excluded from top K, because already in this partition it has
295 * K better siblings, it is only recursed for count only.
297 // update totalNumOfDescendants by the now excluded node and all its descendants
298 totalNumOfDescendantsConsidered--; // reduce the 1 earned when the excluded node entered the heap
299 // and now return it and all its descendants. These will never make it to FacetResult
300 totalNumOfDescendantsConsidered += countOnly (ac.ordinal, youngestChild,
301 olderSibling, arrays, partitionSize, offset, endOffset, localDepth, depth);
302 reusables[++tosReuslables] = ac;
305 tosOrdinal = olderSibling[tosOrdinal];
307 // now pq has best K children of ordinals that belong to the given partition.
308 // Populate a new AACO with them.
309 // tosOrdinal is now first sibling smaller than partition, make a note of that
310 firstToTheLeftOfPartition[localDepth] = tosOrdinal;
311 int aaci = pq.size();
312 int[] ords = new int[aaci];
313 double [] vals = new double [aaci];
315 AggregatedCategory ac = pq.pop();
316 ords[--aaci] = ac.ordinal;
317 vals[aaci] = ac.value;
318 reusables[++tosReuslables] = ac;
320 // if more than 0 ordinals, add this AACO to the map to be returned,
321 // and add ords to sibling stack, and make a note in siblingExplored that these are to
323 if (ords.length > 0) {
324 AACOsOfOnePartition.put(ordinalStack[localDepth-1], new AACO(ords,vals,residue));
325 bestSignlingsStack[localDepth] = ords;
326 siblingExplored[localDepth] = ords.length-1;
327 ordinalStack[localDepth] = ords[ords.length-1];
329 // no ordinals siblings of tosOrdinal in current partition, move to the left of it
330 // tosOrdinal is already there (to the left of partition).
331 // make a note of it in siblingExplored
332 ordinalStack[localDepth] = tosOrdinal;
333 siblingExplored[localDepth] = -1;
336 } // endof we did not check the position of a valid ordinal wrt partition
338 // now tosOrdinal is a valid ordinal, inside partition or to the left of it, we need
339 // to push its kids on top of it, if not too deep.
340 // Make a note that we did not check them yet
341 if (localDepth >= depth) {
342 // localDepth == depth; current tos exhausted its possible children, mark this by pushing INVALID_ORDINAL
343 ordinalStack[++localDepth] = TaxonomyReader.INVALID_ORDINAL;
346 ordinalStack[++localDepth] = youngestChild[tosOrdinal];
347 siblingExplored[localDepth] = Integer.MAX_VALUE;
348 } // endof loop while stack is not empty
350 // now generate a TempFacetResult from AACOsOfOnePartition, and consider self.
351 IntermediateFacetResultWithHash tempFRWH = new IntermediateFacetResultWithHash(
352 facetRequest, AACOsOfOnePartition);
353 if (isSelfPartition(rootNode, arrays, offset)) {
354 tempFRWH.isRootNodeIncluded = true;
355 tempFRWH.rootNodeValue = this.facetRequest.getValueOf(arrays, rootNode % partitionSize);
357 tempFRWH.totalNumOfFacetsConsidered = totalNumOfDescendantsConsidered;
363 * Recursively count <code>ordinal</code>, whose depth is <code>currentDepth</code>,
364 * and all its descendants down to <code>maxDepth</code> (including),
365 * descendants whose value in the count arrays, <code>arrays</code>, is != 0.
366 * The count arrays only includes the current partition, from <code>offset</code>, to (exclusive)
367 * <code>endOffset</code>.
368 * It is assumed that <code>ordinal</code> < <code>endOffset</code>,
369 * otherwise, not <code>ordinal</code>, and none of its descendants, reside in
370 * the current partition. <code>ordinal</code> < <code>offset</code> is allowed,
371 * as ordinal's descendants might be >= <code>offeset</code>.
373 * @param ordinal a facet ordinal.
374 * @param youngestChild mapping a given ordinal to its youngest child in the taxonomy (of largest ordinal number),
375 * or to -1 if has no children.
376 * @param olderSibling mapping a given ordinal to its older sibling, or to -1
377 * @param arrays values for the ordinals in the given partition
378 * @param offset the first (smallest) ordinal in the given partition
379 * @param partitionSize number of ordinals in the given partition
380 * @param endOffset one larger than the largest ordinal that belong to this partition
381 * @param currentDepth the depth or ordinal in the TaxonomyTree (relative to rootnode of the facetRequest)
382 * @param maxDepth maximal depth of descendants to be considered here (measured relative to rootnode of the
385 * @return the number of nodes, from ordinal down its descendants, of depth <= maxDepth,
386 * which reside in the current partition, and whose value != 0
388 private int countOnly(int ordinal, int[] youngestChild, int[] olderSibling,
389 FacetArrays arrays, int partitionSize, int offset,
390 int endOffset, int currentDepth, int maxDepth) {
392 if (offset <= ordinal) {
393 // ordinal belongs to the current partition
394 if (0 != facetRequest.getValueOf(arrays, ordinal % partitionSize)) {
398 // now consider children of ordinal, if not too deep
399 if (currentDepth >= maxDepth) {
403 int yc = youngestChild[ordinal];
404 while (yc >= endOffset) {
405 yc = olderSibling[yc];
407 while (yc > TaxonomyReader.INVALID_ORDINAL) { // assuming this is -1, smaller than any legal ordinal
408 ret += countOnly (yc, youngestChild, olderSibling, arrays,
409 partitionSize, offset, endOffset, currentDepth+1, maxDepth);
410 yc = olderSibling[yc];
416 * Merge several partitions' {@link IntermediateFacetResult}-s into one of the
419 * @see FacetResultsHandler#mergeResults(IntermediateFacetResult...)
422 public IntermediateFacetResult mergeResults(IntermediateFacetResult... tmpResults)
423 throws ClassCastException, IllegalArgumentException {
425 if (tmpResults.length == 0) {
430 // skip over null tmpResults
431 for (; (i < tmpResults.length)&&(tmpResults[i] == null); i++) {}
432 if (i == tmpResults.length) {
433 // all inputs are null
437 // i points to the first non-null input
438 int K = this.facetRequest.getNumResults(); // number of best result in each node
439 IntermediateFacetResultWithHash tmpToReturn = (IntermediateFacetResultWithHash)tmpResults[i++];
441 // now loop over the rest of tmpResults and merge each into tmpToReturn
442 for ( ; i < tmpResults.length; i++) {
443 IntermediateFacetResultWithHash tfr = (IntermediateFacetResultWithHash)tmpResults[i];
444 tmpToReturn.totalNumOfFacetsConsidered += tfr.totalNumOfFacetsConsidered;
445 if (tfr.isRootNodeIncluded) {
446 tmpToReturn.isRootNodeIncluded = true;
447 tmpToReturn.rootNodeValue = tfr.rootNodeValue;
449 // now merge the HashMap of tfr into this of tmpToReturn
450 IntToObjectMap<AACO> tmpToReturnMapToACCOs = tmpToReturn.mapToAACOs;
451 IntToObjectMap<AACO> tfrMapToACCOs = tfr.mapToAACOs;
452 IntIterator tfrIntIterator = tfrMapToACCOs.keyIterator();
453 //iterate over all ordinals in tfr that are maps to their children (and the residue over
454 // non included chilren)
455 while (tfrIntIterator.hasNext()) {
456 int tfrkey = tfrIntIterator.next();
457 AACO tmpToReturnAACO = null;
458 if (null == (tmpToReturnAACO = tmpToReturnMapToACCOs.get(tfrkey))) {
459 // if tmpToReturn does not have any kids of tfrkey, map all the kids
460 // from tfr to it as one package, along with their redisude
461 tmpToReturnMapToACCOs.put(tfrkey, tfrMapToACCOs.get(tfrkey));
463 // merge the best K children of tfrkey as appear in tmpToReturn and in tfr
464 AACO tfrAACO = tfrMapToACCOs.get(tfrkey);
465 int resLength = tfrAACO.ordinals.length + tmpToReturnAACO.ordinals.length;
469 int[] resOrds = new int [resLength];
470 double[] resVals = new double [resLength];
471 double resResidue = tmpToReturnAACO.residue + tfrAACO.residue;
472 int indexIntoTmpToReturn = 0;
473 int indexIntoTFR = 0;
474 ACComparator merger = getSuitableACComparator(); // by facet Request
475 for (int indexIntoRes = 0; indexIntoRes < resLength; indexIntoRes++) {
476 if (indexIntoTmpToReturn >= tmpToReturnAACO.ordinals.length) {
477 //tmpToReturnAACO (former result to return) ran out of indices
478 // it is all merged into resOrds and resVal
479 resOrds[indexIntoRes] = tfrAACO.ordinals[indexIntoTFR];
480 resVals[indexIntoRes] = tfrAACO.values[indexIntoTFR];
484 if (indexIntoTFR >= tfrAACO.ordinals.length) {
485 // tfr ran out of indices
486 resOrds[indexIntoRes] = tmpToReturnAACO.ordinals[indexIntoTmpToReturn];
487 resVals[indexIntoRes] = tmpToReturnAACO.values[indexIntoTmpToReturn];
488 indexIntoTmpToReturn++;
491 // select which goes now to res: next (ord, value) from tmpToReturn or from tfr:
492 if (merger.leftGoesNow( tmpToReturnAACO.ordinals[indexIntoTmpToReturn],
493 tmpToReturnAACO.values[indexIntoTmpToReturn],
494 tfrAACO.ordinals[indexIntoTFR],
495 tfrAACO.values[indexIntoTFR])) {
496 resOrds[indexIntoRes] = tmpToReturnAACO.ordinals[indexIntoTmpToReturn];
497 resVals[indexIntoRes] = tmpToReturnAACO.values[indexIntoTmpToReturn];
498 indexIntoTmpToReturn++;
500 resOrds[indexIntoRes] = tfrAACO.ordinals[indexIntoTFR];
501 resVals[indexIntoRes] = tfrAACO.values[indexIntoTFR];
504 } // end of merge of best kids of tfrkey that appear in tmpToReturn and its kids that appear in tfr
505 // altogether yielding no more that best K kids for tfrkey, not to appear in the new shape of
508 while (indexIntoTmpToReturn < tmpToReturnAACO.ordinals.length) {
509 resResidue += tmpToReturnAACO.values[indexIntoTmpToReturn++];
511 while (indexIntoTFR < tfrAACO.ordinals.length) {
512 resResidue += tfrAACO.values[indexIntoTFR++];
514 //update the list of best kids of tfrkey as appear in tmpToReturn
515 tmpToReturnMapToACCOs.put(tfrkey, new AACO(resOrds, resVals, resResidue));
516 } // endof need to merge both AACO -- children and residue for same ordinal
518 } // endof loop over all ordinals in tfr
519 } // endof loop over all temporary facet results to merge
524 private static class AggregatedCategoryHeap extends PriorityQueue<AggregatedCategory> {
526 private ACComparator merger;
527 public AggregatedCategoryHeap(int size, ACComparator merger) {
528 this.merger = merger;
533 protected boolean lessThan(AggregatedCategory arg1, AggregatedCategory arg2) {
534 return merger.leftGoesNow(arg2.ordinal, arg2.value, arg1.ordinal, arg1.value);
539 private static class ResultNodeHeap extends PriorityQueue<FacetResultNode> {
540 private ACComparator merger;
541 public ResultNodeHeap(int size, ACComparator merger) {
542 this.merger = merger;
547 protected boolean lessThan(FacetResultNode arg1, FacetResultNode arg2) {
548 return merger.leftGoesNow(arg2.getOrdinal(), arg2.getValue(), arg1.getOrdinal(), arg1.getValue());
554 * @return the {@link ACComparator} that reflects the order,
555 * expressed in the {@link FacetRequest}, of
556 * facets in the {@link FacetResult}.
559 private ACComparator getSuitableACComparator() {
560 if (facetRequest.getSortOrder() == SortOrder.ASCENDING) {
561 switch (facetRequest.getSortBy()) {
563 return new AscValueACComparator();
565 return new AscOrdACComparator();
568 switch (facetRequest.getSortBy()) {
570 return new DescValueACComparator();
572 return new DescOrdACComparator();
579 * A comparator of two Aggregated Categories according to the order
580 * (ascending / descending) and item (ordinal or value) specified in the
581 * FacetRequest for the FacetResult to be generated
584 private static abstract class ACComparator {
586 protected abstract boolean leftGoesNow (int ord1, double val1, int ord2, double val2);
589 private static final class AscValueACComparator extends ACComparator {
591 AscValueACComparator() { }
594 protected boolean leftGoesNow (int ord1, double val1, int ord2, double val2) {
595 return (val1 < val2);
599 private static final class DescValueACComparator extends ACComparator {
601 DescValueACComparator() { }
604 protected boolean leftGoesNow (int ord1, double val1, int ord2, double val2) {
605 return (val1 > val2);
609 private static final class AscOrdACComparator extends ACComparator {
611 AscOrdACComparator() { }
614 protected boolean leftGoesNow (int ord1, double val1, int ord2, double val2) {
615 return (ord1 < ord2);
619 private static final class DescOrdACComparator extends ACComparator {
621 DescOrdACComparator() { }
624 protected boolean leftGoesNow (int ord1, double val1, int ord2, double val2) {
625 return (ord1 > ord2);
630 * Intermediate result to hold counts from one or more partitions processed
631 * thus far. Its main field, constructor parameter <i>mapToAACOs</i>, is a map
632 * from ordinals to AACOs. The AACOs mapped to contain ordinals and values
633 * encountered in the count arrays of the partitions processed thus far. The
634 * ordinals mapped from are their parents, and they may be not contained in
635 * the partitions processed thus far. All nodes belong to the taxonomy subtree
636 * defined at the facet request, constructor parameter <i>facetReq</i>, by its
639 public static class IntermediateFacetResultWithHash implements IntermediateFacetResult {
640 protected IntToObjectMap<AACO> mapToAACOs;
641 FacetRequest facetRequest;
642 boolean isRootNodeIncluded; // among the ordinals in the partitions
643 // processed thus far
644 double rootNodeValue; // the value of it, in case encountered.
645 int totalNumOfFacetsConsidered; // total number of facets
646 // which belong to facetRequest subtree and have value != 0,
647 // and have been encountered thus far in the partitions processed.
648 // root node of result tree is not included in this count.
650 public IntermediateFacetResultWithHash(FacetRequest facetReq,
651 IntToObjectMap<AACO> mapToAACOs) {
652 this.mapToAACOs = mapToAACOs;
653 this.facetRequest = facetReq;
654 this.isRootNodeIncluded = false;
655 this.rootNodeValue = 0.0;
656 this.totalNumOfFacetsConsidered = 0;
659 public FacetRequest getFacetRequest() {
660 return this.facetRequest;
662 } // endof FacetResultWithHash
665 * Maintains info of one entry in the filled up count array:
666 * an ordinal number of a category and the value aggregated for it
667 * (typically, that value is the count for that ordinal).
669 private static final class AggregatedCategory {
672 AggregatedCategory(int ord, double val) {
679 * Maintains an array of {@link AggregatedCategory}. For space consideration, this is implemented as
680 * a pair of arrays, <i>ordinals</i> and <i>values</i>, rather than one array of pairs.
681 * Enumerated in <i>ordinals</i> are siblings,
682 * potential nodes of the {@link FacetResult} tree
683 * (i.e., the descendants of the root node, no deeper than the specified depth).
684 * No more than K ( = {@link FacetRequest#getNumResults()})
685 * siblings are enumerated, and
686 * <i>residue</i> holds the sum of values of the siblings rejected from the
689 private static final class AACO {
690 int [] ordinals; // ordinals of the best K children, sorted from best to least
691 double [] values; // the respective values for these children
692 double residue; // sum of values of all other children, that did not get into top K
693 AACO (int[] ords, double[] vals, double r) {
694 this.ordinals = ords;
702 * Recursively label the first facetRequest.getNumLabel() sub results
703 * of the root of a given {@link FacetResult}, or of an already labeled node in it.
704 * I.e., a node is labeled only if it is the root or all its ancestors are labeled.
706 public void labelResult(FacetResult facetResult) throws IOException {
707 if (facetResult == null) {
708 return; // any result to label?
710 FacetResultNode rootNode = facetResult.getFacetResultNode();
711 recursivelyLabel(rootNode, facetRequest.getNumLabel());
714 private void recursivelyLabel(FacetResultNode node, int numToLabel) throws IOException {
718 node.getLabel(this.taxonomyReader); // attach a label -- category path -- to the node
719 if (null == node.getSubResults()) {
720 return; // if node has no children -- done
723 // otherwise, label the first numToLabel of these children, and recursively -- their children.
725 for (FacetResultNode frn : node.getSubResults()) {
726 // go over the children of node from first to last, no more than numToLable of them
727 recursivelyLabel(frn, numToLabel);
728 if (++numLabeled >= numToLabel) {
735 // verifies that the children of each node are sorted by the order
736 // specified by the facetRequest.
737 // the values in these nodes may have changed due to a re-count, for example
738 // following the accumulation by Sampling.
739 // so now we test and re-order if necessary.
740 public FacetResult rearrangeFacetResult(FacetResult facetResult) {
741 PriorityQueue<FacetResultNode> nodesHeap =
742 new ResultNodeHeap(this.facetRequest.getNumResults(), this.getSuitableACComparator());
743 MutableFacetResultNode topFrn = (MutableFacetResultNode) facetResult.getFacetResultNode(); // safe cast
744 rearrangeChilrenOfNode(topFrn, nodesHeap);
748 private void rearrangeChilrenOfNode(FacetResultNode node,
749 PriorityQueue<FacetResultNode> nodesHeap) {
750 nodesHeap.clear(); // just to be safe
751 for (FacetResultNode frn : node.getSubResults()) {
754 int size = nodesHeap.size();
755 ArrayList<FacetResultNode> subResults = new ArrayList<FacetResultNode>(size);
756 while (nodesHeap.size()>0) {
757 subResults.add(0,nodesHeap.pop());
759 ((MutableFacetResultNode)node).setSubResults(subResults);
760 for (FacetResultNode frn : node.getSubResults()) {
761 rearrangeChilrenOfNode(frn, nodesHeap);
767 public FacetResult renderFacetResult(IntermediateFacetResult tmpResult) throws IOException {
768 IntermediateFacetResultWithHash tmp = (IntermediateFacetResultWithHash) tmpResult;
769 int ordinal = this.taxonomyReader.getOrdinal(this.facetRequest.getCategoryPath());
770 if ((tmp == null) || (ordinal == TaxonomyReader.INVALID_ORDINAL)) {
773 double value = Double.NaN;
774 if (tmp.isRootNodeIncluded) {
775 value = tmp.rootNodeValue;
777 MutableFacetResultNode root = generateNode (ordinal, value, tmp.mapToAACOs);
778 return new FacetResult (tmp.facetRequest, root, tmp.totalNumOfFacetsConsidered);
782 private MutableFacetResultNode generateNode (int ordinal, double val, IntToObjectMap<AACO> mapToAACOs) {
783 MutableFacetResultNode node = new MutableFacetResultNode(ordinal, val);
784 AACO aaco = mapToAACOs.get(ordinal);
788 List<FacetResultNode> list = new ArrayList<FacetResultNode>();
789 for (int i = 0; i < aaco.ordinals.length; i++) {
790 list.add(generateNode(aaco.ordinals[i], aaco.values[i], mapToAACOs));
792 node.setSubResults(list);
793 node.setResidue(aaco.residue);