pylucene 3.5.0-3
[pylucene.git] / lucene-java-3.5.0 / lucene / contrib / facet / src / test / org / apache / lucene / facet / search / params / MultiIteratorsPerCLParamsTest.java
diff --git a/lucene-java-3.5.0/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/params/MultiIteratorsPerCLParamsTest.java b/lucene-java-3.5.0/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/params/MultiIteratorsPerCLParamsTest.java
new file mode 100644 (file)
index 0000000..3689d04
--- /dev/null
@@ -0,0 +1,267 @@
+package org.apache.lucene.facet.search.params;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.store.Directory;
+import org.junit.Test;
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.facet.index.CategoryDocumentBuilder;
+import org.apache.lucene.facet.index.params.CategoryListParams;
+import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
+import org.apache.lucene.facet.index.params.FacetIndexingParams;
+import org.apache.lucene.facet.search.CategoryListIterator;
+import org.apache.lucene.facet.search.FacetArrays;
+import org.apache.lucene.facet.search.FacetResultsHandler;
+import org.apache.lucene.facet.search.FacetsAccumulator;
+import org.apache.lucene.facet.search.ScoredDocIDs;
+import org.apache.lucene.facet.search.StandardFacetsAccumulator;
+import org.apache.lucene.facet.search.TopKFacetResultsHandler;
+import org.apache.lucene.facet.search.cache.CategoryListCache;
+import org.apache.lucene.facet.search.results.FacetResult;
+import org.apache.lucene.facet.search.results.FacetResultNode;
+import org.apache.lucene.facet.search.results.IntermediateFacetResult;
+import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
+import org.apache.lucene.facet.util.ScoredDocIdsUtils;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Test faceted search with creation of multiple category list iterators by the
+ * same CLP, depending on the provided facet request
+ */
+public class MultiIteratorsPerCLParamsTest extends LuceneTestCase {
+
+  CategoryPath[][] perDocCategories = new CategoryPath[][] {
+      { new CategoryPath("author", "Mark Twain"),
+          new CategoryPath("date", "2010") },
+      { new CategoryPath("author", "Robert Frost"),
+          new CategoryPath("date", "2009") },
+      { new CategoryPath("author", "Artur Miller"),
+          new CategoryPath("date", "2010") },
+      { new CategoryPath("author", "Edgar Allan Poe"),
+          new CategoryPath("date", "2009") },
+      { new CategoryPath("author", "Henry James"),
+          new CategoryPath("date", "2010") } };
+  
+  String countForbiddenDimension;
+
+  @Test
+  public void testCLParamMultiIteratorsByRequest() throws Exception {
+    doTestCLParamMultiIteratorsByRequest(false);
+  }
+
+  @Test
+  public void testCLParamMultiIteratorsByRequestCacheCLI() throws Exception {
+    doTestCLParamMultiIteratorsByRequest(true);
+  }
+
+  private void doTestCLParamMultiIteratorsByRequest(boolean cacheCLI) throws Exception,
+      CorruptIndexException, IOException {
+    // Create a CLP which generates different CLIs according to the
+    // FacetRequest's dimension
+    CategoryListParams clp = new CategoryListParams();
+    FacetIndexingParams iParams = new DefaultFacetIndexingParams(clp);
+    Directory indexDir = newDirectory();
+    Directory taxoDir = newDirectory();
+    populateIndex(iParams, indexDir, taxoDir);
+
+    TaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir);
+    IndexReader reader = IndexReader.open(indexDir);
+
+    CategoryListCache clCache = null;
+    if (cacheCLI) {
+      // caching the iteratorr, so:
+      // 1: create the cached iterator, using original params
+      clCache = new CategoryListCache();
+      clCache.loadAndRegister(clp, reader, taxo, iParams);
+    }
+    
+    ScoredDocIDs allDocs = ScoredDocIdsUtils
+        .createAllDocsScoredDocIDs(reader);
+
+    // Search index with 'author' should filter ONLY ordinals whose parent
+    // is 'author'
+    countForbiddenDimension = "date";
+    validateFacetedSearch(iParams, taxo, reader, clCache, allDocs, "author", 5, 5);
+
+    // Search index with 'date' should filter ONLY ordinals whose parent is
+    // 'date'
+    countForbiddenDimension = "author";
+    validateFacetedSearch(iParams, taxo, reader, clCache, allDocs, "date", 5, 2);
+
+    // Search index with both 'date' and 'author'
+    countForbiddenDimension = null;
+    validateFacetedSearch(iParams, taxo, reader, clCache, allDocs, new String[] {
+            "author", "date" }, new int[] { 5, 5 }, new int[] { 5, 2 });
+    taxo.close();
+    reader.close();
+    indexDir.close();
+    taxoDir.close();
+  }
+
+  private void validateFacetedSearch(FacetIndexingParams iParams,
+      TaxonomyReader taxo, IndexReader reader, CategoryListCache clCache,
+      ScoredDocIDs allDocs, String dimension, int expectedValue, int expectedNumDescendants) throws IOException {
+    validateFacetedSearch(iParams, taxo, reader, clCache, allDocs,
+        new String[] { dimension }, new int[] { expectedValue },
+        new int[] { expectedNumDescendants });
+  }
+
+  private void validateFacetedSearch(FacetIndexingParams iParams,
+      TaxonomyReader taxo, IndexReader reader,  CategoryListCache clCache, ScoredDocIDs allDocs,
+      String[] dimension, int[] expectedValue,
+      int[] expectedNumDescendants)
+      throws IOException {
+    FacetSearchParams sParams = new FacetSearchParams(iParams);
+    sParams.setClCache(clCache);
+    for (String dim : dimension) {
+      sParams.addFacetRequest(new PerDimCountFacetRequest(
+          new CategoryPath(dim), 10));
+    }
+    FacetsAccumulator acc = new StandardFacetsAccumulator(sParams, reader, taxo);
+    
+    // no use to test this with complement since at that mode all facets are taken
+    acc.setComplementThreshold(FacetsAccumulator.DISABLE_COMPLEMENT);
+
+    List<FacetResult> results = acc.accumulate(allDocs);
+    assertEquals("Wrong #results", dimension.length, results.size());
+
+    for (int i = 0; i < results.size(); i++) {
+      FacetResult res = results.get(i);
+      assertEquals("wrong num-descendants for dimension " + dimension[i],
+          expectedNumDescendants[i], res.getNumValidDescendants());
+      FacetResultNode resNode = res.getFacetResultNode();
+      assertEquals("wrong value for dimension " + dimension[i],
+          expectedValue[i], (int) resNode.getValue());
+    }
+  }
+
+  private void populateIndex(FacetIndexingParams iParams, Directory indexDir,
+      Directory taxoDir) throws Exception {
+    RandomIndexWriter writer = new RandomIndexWriter(random, indexDir, 
+        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.KEYWORD, false)));
+    TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
+
+    for (CategoryPath[] categories : perDocCategories) {
+      writer.addDocument(new CategoryDocumentBuilder(taxoWriter, iParams)
+          .setCategoryPaths(Arrays.asList(categories)).build(
+              new Document()));
+
+    }
+    taxoWriter.commit();
+    writer.commit();
+    taxoWriter.close();
+    writer.close();
+  }
+
+  private class PerDimCountFacetRequest extends CountFacetRequest {
+    
+    public PerDimCountFacetRequest(CategoryPath path, int num) {
+      super(path, num);
+    }
+
+    @Override
+    public CategoryListIterator createCategoryListIterator(IndexReader reader, 
+        TaxonomyReader taxo, FacetSearchParams sParams, int partition) throws IOException {
+      // categories of certain dimension only
+      return new PerDimensionCLI(taxo, super.createCategoryListIterator(
+          reader, taxo, sParams, partition), getCategoryPath());
+    }
+    
+    @Override
+    /** Override this method just for verifying that only specified facets are iterated.. */
+    public FacetResultsHandler createFacetResultsHandler(
+        TaxonomyReader taxonomyReader) {
+      return new TopKFacetResultsHandler(taxonomyReader, this) {
+        @Override
+        public IntermediateFacetResult fetchPartitionResult(
+            FacetArrays facetArrays, int offset) throws IOException {
+          final IntermediateFacetResult res = super.fetchPartitionResult(facetArrays, offset);
+          if (countForbiddenDimension!=null) {
+            int ord = taxonomyReader.getOrdinal(new CategoryPath(countForbiddenDimension));
+            assertEquals("Should not have accumulated for dimension '"+countForbiddenDimension+"'!",0,facetArrays.getIntArray()[ord]);
+          }
+          return res;
+        }
+      };
+    }
+  }
+
+  /**
+   * a CLI which filters another CLI for the dimension of the provided
+   * category-path
+   */
+  private static class PerDimensionCLI implements CategoryListIterator {
+    private final CategoryListIterator superCLI;
+    private final int[] parentArray;
+    private final int parentOrdinal;
+
+    PerDimensionCLI(TaxonomyReader taxo, CategoryListIterator superCLI,
+        CategoryPath requestedPath) throws IOException {
+      this.superCLI = superCLI;
+      if (requestedPath == null) {
+        parentOrdinal = 0;
+      } else {
+        CategoryPath cp = new CategoryPath(requestedPath.getComponent(0));
+        parentOrdinal = taxo.getOrdinal(cp);
+      }
+      parentArray = taxo.getParentArray();
+    }
+
+    public boolean init() throws IOException {
+      return superCLI.init();
+    }
+
+    public long nextCategory() throws IOException {
+      long next;
+      while ((next = superCLI.nextCategory()) <= Integer.MAX_VALUE
+          && !isInDimension((int) next)) {
+      }
+
+      return next;
+    }
+
+    /** look for original parent ordinal, meaning same dimension */
+    private boolean isInDimension(int ordinal) {
+      while (ordinal > 0) {
+        if (ordinal == parentOrdinal) {
+          return true;
+        }
+        ordinal = parentArray[ordinal];
+      }
+      return false;
+    }
+
+    public boolean skipTo(int docId) throws IOException {
+      return superCLI.skipTo(docId);
+    }
+  }
+}
\ No newline at end of file