1 package org.apache.lucene.facet.search;
3 import java.io.IOException;
4 import java.util.ArrayList;
7 import org.apache.lucene.analysis.MockAnalyzer;
8 import org.apache.lucene.document.Document;
9 import org.apache.lucene.document.Field;
10 import org.apache.lucene.document.Field.Index;
11 import org.apache.lucene.document.Field.Store;
12 import org.apache.lucene.document.Field.TermVector;
13 import org.apache.lucene.index.CorruptIndexException;
14 import org.apache.lucene.index.IndexReader;
15 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
16 import org.apache.lucene.index.RandomIndexWriter;
17 import org.apache.lucene.index.Term;
18 import org.apache.lucene.search.IndexSearcher;
19 import org.apache.lucene.search.Query;
20 import org.apache.lucene.search.TermQuery;
21 import org.apache.lucene.store.Directory;
22 import org.junit.Test;
24 import org.apache.lucene.util.LuceneTestCase;
25 import org.apache.lucene.facet.index.CategoryDocumentBuilder;
26 import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
27 import org.apache.lucene.facet.search.FacetsAccumulator;
28 import org.apache.lucene.facet.search.FloatArrayAllocator;
29 import org.apache.lucene.facet.search.IntArrayAllocator;
30 import org.apache.lucene.facet.search.ScoredDocIdCollector;
31 import org.apache.lucene.facet.search.StandardFacetsAccumulator;
32 import org.apache.lucene.facet.search.params.CountFacetRequest;
33 import org.apache.lucene.facet.search.params.FacetSearchParams;
34 import org.apache.lucene.facet.search.params.FacetRequest.ResultMode;
35 import org.apache.lucene.facet.search.results.FacetResult;
36 import org.apache.lucene.facet.search.results.FacetResultNode;
37 import org.apache.lucene.facet.taxonomy.CategoryPath;
38 import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
39 import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader;
40 import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
41 import org.apache.lucene.facet.util.PartitionsUtils;
44 * Licensed to the Apache Software Foundation (ASF) under one or more
45 * contributor license agreements. See the NOTICE file distributed with
46 * this work for additional information regarding copyright ownership.
47 * The ASF licenses this file to You under the Apache License, Version 2.0
48 * (the "License"); you may not use this file except in compliance with
49 * the License. You may obtain a copy of the License at
51 * http://www.apache.org/licenses/LICENSE-2.0
53 * Unless required by applicable law or agreed to in writing, software
54 * distributed under the License is distributed on an "AS IS" BASIS,
55 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
56 * See the License for the specific language governing permissions and
57 * limitations under the License.
60 public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
62 //TODO (Facet): Move to extend BaseTestTopK and separate to several smaller test cases (methods) - see TestTopKResultsHandler
65 public void testSimple() throws Exception {
67 int[] partitionSizes = new int[] {
68 2,3,4, 5, 6, 7, 10, 1000,
71 for (int partitionSize : partitionSizes) {
72 Directory iDir = newDirectory();
73 Directory tDir = newDirectory();
76 System.out.println("Partition Size: " + partitionSize);
79 final int pSize = partitionSize;
80 DefaultFacetIndexingParams iParams = new DefaultFacetIndexingParams() {
82 protected int fixedPartitionSize() {
87 RandomIndexWriter iw = new RandomIndexWriter(random, iDir,
88 newIndexWriterConfig(TEST_VERSION_CURRENT,
89 new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE));
90 TaxonomyWriter tw = new LuceneTaxonomyWriter(tDir);
91 prvt_add(iParams, iw, tw, "a", "b");
92 prvt_add(iParams, iw, tw, "a", "b", "1");
93 prvt_add(iParams, iw, tw, "a", "b", "1");
94 prvt_add(iParams, iw, tw, "a", "b", "2");
95 prvt_add(iParams, iw, tw, "a", "b", "2");
96 prvt_add(iParams, iw, tw, "a", "b", "2");
97 prvt_add(iParams, iw, tw, "a", "b", "3");
98 prvt_add(iParams, iw, tw, "a", "b", "4");
99 prvt_add(iParams, iw, tw, "a", "c");
100 prvt_add(iParams, iw, tw, "a", "c");
101 prvt_add(iParams, iw, tw, "a", "c");
102 prvt_add(iParams, iw, tw, "a", "c");
103 prvt_add(iParams, iw, tw, "a", "c");
104 prvt_add(iParams, iw, tw, "a", "c", "1");
105 prvt_add(iParams, iw, tw, "a", "d");
106 prvt_add(iParams, iw, tw, "a", "e");
108 IndexReader ir = iw.getReader();
113 IndexSearcher is = newSearcher(ir);
114 LuceneTaxonomyReader tr = new LuceneTaxonomyReader(tDir);
116 // Get all of the documents and run the query, then do different
117 // facet counts and compare to control
118 Query q = new TermQuery(new Term("content", "alpha"));
119 ScoredDocIdCollector scoredDoc = ScoredDocIdCollector.create(is.maxDoc(), true);
121 // Collector collector = new MultiCollector(scoredDoc);
122 is.search(q, scoredDoc);
124 CountFacetRequest cfra23 = new CountFacetRequest(
125 new CategoryPath("a"), 2);
127 cfra23.setResultMode(ResultMode.PER_NODE_IN_TREE);
129 CountFacetRequest cfra22 = new CountFacetRequest(
130 new CategoryPath("a"), 2);
132 cfra22.setResultMode(ResultMode.PER_NODE_IN_TREE);
134 CountFacetRequest cfra21 = new CountFacetRequest(
135 new CategoryPath("a"), 2);
137 cfra21.setResultMode(ResultMode.PER_NODE_IN_TREE);
139 CountFacetRequest cfrb22 = new CountFacetRequest(
140 new CategoryPath("a", "b"), 2);
142 cfrb22.setResultMode(ResultMode.PER_NODE_IN_TREE);
144 CountFacetRequest cfrb23 = new CountFacetRequest(
145 new CategoryPath("a", "b"), 2);
147 cfrb23.setResultMode(ResultMode.PER_NODE_IN_TREE);
149 CountFacetRequest cfrb21 = new CountFacetRequest(
150 new CategoryPath("a", "b"), 2);
152 cfrb21.setResultMode(ResultMode.PER_NODE_IN_TREE);
154 CountFacetRequest doctor = new CountFacetRequest(
155 new CategoryPath("Doctor"), 2);
157 doctor.setResultMode(ResultMode.PER_NODE_IN_TREE);
159 CountFacetRequest cfrb20 = new CountFacetRequest(
160 new CategoryPath("a", "b"), 2);
162 cfrb20.setResultMode(ResultMode.PER_NODE_IN_TREE);
164 FacetSearchParams facetSearchParams = new FacetSearchParams(iParams);
165 facetSearchParams.addFacetRequest(cfra23);
166 facetSearchParams.addFacetRequest(cfra22);
167 facetSearchParams.addFacetRequest(cfra21);
168 facetSearchParams.addFacetRequest(cfrb23);
169 facetSearchParams.addFacetRequest(cfrb22);
170 facetSearchParams.addFacetRequest(cfrb21);
171 facetSearchParams.addFacetRequest(doctor);
172 facetSearchParams.addFacetRequest(cfrb20);
174 IntArrayAllocator iaa = new IntArrayAllocator(PartitionsUtils.partitionSize(facetSearchParams,tr), 1);
175 FloatArrayAllocator faa = new FloatArrayAllocator(PartitionsUtils.partitionSize(facetSearchParams,tr), 1);
176 FacetsAccumulator fctExtrctr = new StandardFacetsAccumulator(facetSearchParams, is.getIndexReader(), tr, iaa, faa);
177 fctExtrctr.setComplementThreshold(FacetsAccumulator.DISABLE_COMPLEMENT);
178 long start = System.currentTimeMillis();
180 List<FacetResult> facetResults = fctExtrctr.accumulate(scoredDoc.getScoredDocIDs());
182 long end = System.currentTimeMillis();
184 System.out.println("Time: " + (end - start));
187 FacetResult fr = facetResults.get(0); // a, depth=3, K=2
188 boolean hasDoctor = "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
189 assertEquals(9, fr.getNumValidDescendants());
190 FacetResultNode parentRes = fr.getFacetResultNode();
191 assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE);
192 assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE);
193 assertEquals(2, parentRes.getNumSubResults());
194 // two nodes sorted by descending values: a/b with 8 and a/c with 6
195 // a/b has residue 2 and two children a/b/2 with value 3, and a/b/1 with value 2.
196 // a/c has residue 0, and one child a/c/1 with value 1.
197 double [] expectedValues0 = { 8.0, 2.0, 3.0, 0.0, 2.0, 0.0, 6.0, 0.0, 1.0, 0.0 };
199 for (FacetResultNode node : parentRes.getSubResults()) {
200 assertEquals(expectedValues0[i++], node.getValue(), Double.MIN_VALUE);
201 assertEquals(expectedValues0[i++], node.getResidue(), Double.MIN_VALUE);
202 for (FacetResultNode node2 : node.getSubResults()) {
203 assertEquals(expectedValues0[i++], node2.getValue(), Double.MIN_VALUE);
204 assertEquals(expectedValues0[i++], node2.getResidue(), Double.MIN_VALUE);
208 // now just change the value of the first child of the root to 5, and then rearrange
209 // expected are: first a/c of value 6 and residue 0, and one child a/c/1 with value 1
210 // then a/b with value 5 and residue 2, and both children: a/b/2 with value 3, and a/b/1 with value 2.
211 for (FacetResultNode node : parentRes.getSubResults()) {
216 double [] expectedValues00 = { 6.0, 0.0, 1.0, 0.0, 5.0, 2.0, 3.0, 0.0, 2.0, 0.0 };
217 fr = cfra23.createFacetResultsHandler(tr).rearrangeFacetResult(fr);
219 for (FacetResultNode node : parentRes.getSubResults()) {
220 assertEquals(expectedValues00[i++], node.getValue(), Double.MIN_VALUE);
221 assertEquals(expectedValues00[i++], node.getResidue(), Double.MIN_VALUE);
222 for (FacetResultNode node2 : node.getSubResults()) {
223 assertEquals(expectedValues00[i++], node2.getValue(), Double.MIN_VALUE);
224 assertEquals(expectedValues00[i++], node2.getResidue(), Double.MIN_VALUE);
228 fr = facetResults.get(1); // a, depth=2, K=2. same result as before
229 hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
230 assertEquals(9, fr.getNumValidDescendants());
231 parentRes = fr.getFacetResultNode();
232 assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE);
233 assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE);
234 assertEquals(2, parentRes.getNumSubResults());
235 // two nodes sorted by descending values: a/b with 8 and a/c with 6
236 // a/b has residue 2 and two children a/b/2 with value 3, and a/b/1 with value 2.
237 // a/c has residue 0, and one child a/c/1 with value 1.
239 for (FacetResultNode node : parentRes.getSubResults()) {
240 assertEquals(expectedValues0[i++], node.getValue(), Double.MIN_VALUE);
241 assertEquals(expectedValues0[i++], node.getResidue(), Double.MIN_VALUE);
242 for (FacetResultNode node2 : node.getSubResults()) {
243 assertEquals(expectedValues0[i++], node2.getValue(), Double.MIN_VALUE);
244 assertEquals(expectedValues0[i++], node2.getResidue(), Double.MIN_VALUE);
248 fr = facetResults.get(2); // a, depth=1, K=2
249 hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
250 assertEquals(4, fr.getNumValidDescendants(), 4);
251 parentRes = fr.getFacetResultNode();
252 assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE);
253 assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE);
254 assertEquals(2, parentRes.getNumSubResults());
255 // two nodes sorted by descending values:
256 // a/b with value 8 and residue 0 (because no children considered),
257 // and a/c with value 6 and residue 0 (because no children considered)
258 double [] expectedValues2 = { 8.0, 0.0, 6.0, 0.0 };
260 for (FacetResultNode node : parentRes.getSubResults()) {
261 assertEquals(expectedValues2[i++], node.getValue(), Double.MIN_VALUE);
262 assertEquals(expectedValues2[i++], node.getResidue(), Double.MIN_VALUE);
263 assertEquals(node.getNumSubResults(), 0);
266 fr = facetResults.get(3); // a/b, depth=3, K=2
267 hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
268 assertEquals(4, fr.getNumValidDescendants());
269 parentRes = fr.getFacetResultNode();
270 assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE);
271 assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE);
272 assertEquals(2, parentRes.getNumSubResults());
273 double [] expectedValues3 = { 3.0, 2.0 };
275 for (FacetResultNode node : parentRes.getSubResults()) {
276 assertEquals(expectedValues3[i++], node.getValue(), Double.MIN_VALUE);
277 assertEquals(0.0, node.getResidue(), Double.MIN_VALUE);
278 assertEquals(0, node.getNumSubResults());
281 fr = facetResults.get(4); // a/b, depth=2, K=2
282 hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
283 assertEquals(4, fr.getNumValidDescendants());
284 parentRes = fr.getFacetResultNode();
285 assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE);
286 assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE);
287 assertEquals(2, parentRes.getNumSubResults());
289 for (FacetResultNode node : parentRes.getSubResults()) {
290 assertEquals(expectedValues3[i++], node.getValue(), Double.MIN_VALUE);
291 assertEquals(0.0, node.getResidue(), Double.MIN_VALUE);
292 assertEquals(0, node.getNumSubResults());
295 fr = facetResults.get(5); // a/b, depth=1, K=2
296 hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
297 assertEquals(4, fr.getNumValidDescendants());
298 parentRes = fr.getFacetResultNode();
299 assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE);
300 assertEquals(2.0, parentRes.getResidue(), Double.MIN_VALUE);
301 assertEquals(2, parentRes.getNumSubResults());
303 for (FacetResultNode node : parentRes.getSubResults()) {
304 assertEquals(expectedValues3[i++], node.getValue(), Double.MIN_VALUE);
305 assertEquals(0.0, node.getResidue(), Double.MIN_VALUE);
306 assertEquals(0, node.getNumSubResults());
309 fr = facetResults.get(6); // a/b, depth=0, K=2
310 hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
311 assertEquals(0, fr.getNumValidDescendants()); // 0 descendants but rootnode
312 parentRes = fr.getFacetResultNode();
313 assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE);
314 assertEquals(0.0, parentRes.getResidue(), Double.MIN_VALUE);
315 assertEquals(0, parentRes.getNumSubResults());
316 hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
318 // doctor, depth=1, K=2
319 assertFalse("Shouldn't have found anything for a FacetRequest " +
320 "of a facet that doesn't exist in the index.", hasDoctor);
321 assertEquals("Shouldn't have found more than seven request.", 7, facetResults.size());
330 private void prvt_add(DefaultFacetIndexingParams iParams, RandomIndexWriter iw,
331 TaxonomyWriter tw, String... strings) throws IOException,
332 CorruptIndexException {
333 ArrayList<CategoryPath> cps = new ArrayList<CategoryPath>();
334 CategoryPath cp = new CategoryPath(strings);
336 Document d = new Document();
337 new CategoryDocumentBuilder(tw, iParams).setCategoryPaths(cps).build(d);
338 d.add(new Field("content", "alpha", Store.YES, Index.ANALYZED, TermVector.NO));