lucene-java-3.5.0/lucene/contrib/facet/src/test/org/apache/lucene/facet/FacetTestBase.java

   1 package org.apache.lucene.facet;
   2
   3 import java.io.File;
   4 import java.io.IOException;
   5 import java.util.ArrayList;
   6 import java.util.Arrays;
   7 import java.util.HashMap;
   8 import java.util.HashSet;
   9 import java.util.List;
  10 import java.util.Map;
  11
  12 import org.apache.lucene.analysis.Analyzer;
  13 import org.apache.lucene.analysis.MockAnalyzer;
  14 import org.apache.lucene.analysis.MockTokenizer;
  15 import org.apache.lucene.document.Document;
  16 import org.apache.lucene.document.Field;
  17 import org.apache.lucene.document.Field.Index;
  18 import org.apache.lucene.document.Field.Store;
  19 import org.apache.lucene.document.Field.TermVector;
  20 import org.apache.lucene.index.CorruptIndexException;
  21 import org.apache.lucene.index.IndexReader;
  22 import org.apache.lucene.index.IndexWriterConfig;
  23 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
  24 import org.apache.lucene.index.RandomIndexWriter;
  25 import org.apache.lucene.index.Term;
  26 import org.apache.lucene.index.TermDocs;
  27 import org.apache.lucene.index.TermEnum;
  28 import org.apache.lucene.search.IndexSearcher;
  29 import org.apache.lucene.store.Directory;
  30
  31 import org.apache.lucene.util.IOUtils;
  32 import org.apache.lucene.util.LuceneTestCase;
  33 import org.apache.lucene.util._TestUtil;
  34 import org.apache.lucene.facet.index.CategoryDocumentBuilder;
  35 import org.apache.lucene.facet.index.params.CategoryListParams;
  36 import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
  37 import org.apache.lucene.facet.index.params.FacetIndexingParams;
  38 import org.apache.lucene.facet.search.params.FacetRequest;
  39 import org.apache.lucene.facet.search.params.FacetSearchParams;
  40 import org.apache.lucene.facet.search.results.FacetResult;
  41 import org.apache.lucene.facet.search.results.FacetResultNode;
  42 import org.apache.lucene.facet.taxonomy.CategoryPath;
  43 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
  44 import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
  45 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
  46 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
  47 import org.junit.AfterClass;
  48 import org.junit.BeforeClass;
  49
  50 /**
  51  * Licensed to the Apache Software Foundation (ASF) under one or more
  52  * contributor license agreements.  See the NOTICE file distributed with
  53  * this work for additional information regarding copyright ownership.
  54  * The ASF licenses this file to You under the Apache License, Version 2.0
  55  * (the "License"); you may not use this file except in compliance with
  56  * the License.  You may obtain a copy of the License at
  57  *
  58  *     http://www.apache.org/licenses/LICENSE-2.0
  59  *
  60  * Unless required by applicable law or agreed to in writing, software
  61  * distributed under the License is distributed on an "AS IS" BASIS,
  62  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  63  * See the License for the specific language governing permissions and
  64  * limitations under the License.
  65  */
  66
  67 /** Base faceted search test. */
  68 public abstract class FacetTestBase extends LuceneTestCase {
  69
  70   /** Holds a search and taxonomy Directories pair. */
  71   private static final class SearchTaxoDirPair {
  72     Directory searchDir, taxoDir;
  73     SearchTaxoDirPair() {}
  74   }
  75
  76   private static HashMap<Integer, SearchTaxoDirPair> dirsPerPartitionSize;
  77   private static File TEST_DIR;
  78
  79   /** Documents text field. */
  80   protected static final String CONTENT_FIELD = "content";
  81
  82   /** taxonomy Reader for the test. */
  83   protected TaxonomyReader taxoReader;
  84
  85   /** Index Reader for the test. */
  86   protected IndexReader indexReader;
  87
  88   /** Searcher for the test. */
  89   protected IndexSearcher searcher;
  90
  91   @BeforeClass
  92   public static void beforeClassFacetTestBase() throws Exception {
  93     TEST_DIR = _TestUtil.getTempDir("facets");
  94     dirsPerPartitionSize = new HashMap<Integer, FacetTestBase.SearchTaxoDirPair>();
  95   }
  96
  97   @AfterClass
  98   public static void afterClassFacetTestBase() throws Exception {
  99     for (SearchTaxoDirPair pair : dirsPerPartitionSize.values()) {
 100       IOUtils.close(pair.searchDir, pair.taxoDir);
 101     }
 102   }
 103
 104   /** documents text (for the text field). */
 105   private static final String[] DEFAULT_CONTENT = {
 106       "the white car is the one I want.",
 107       "the white dog does not belong to anyone.",
 108   };
 109
 110   /** Facets: facets[D][F] == category-path no. F for document no. D. */
 111   private static final CategoryPath[][] DEFAULT_CATEGORIES = {
 112       { new CategoryPath("root","a","f1"), new CategoryPath("root","a","f2") },
 113       { new CategoryPath("root","a","f1"), new CategoryPath("root","a","f3") },
 114   };
 115
 116   /** categories to be added to specified doc */
 117   protected List<CategoryPath> getCategories(int doc) {
 118     return Arrays.asList(DEFAULT_CATEGORIES[doc]);
 119   }
 120
 121   /** Number of documents to index */
 122   protected int numDocsToIndex() {
 123     return DEFAULT_CONTENT.length;
 124   }
 125
 126   /** content to be added to specified doc */
 127   protected String getContent(int doc) {
 128     return DEFAULT_CONTENT[doc];
 129   }
 130
 131   /** Prepare index (in RAM) with single partition */
 132   protected final void initIndex() throws Exception {
 133     initIndex(Integer.MAX_VALUE);
 134   }
 135
 136   /** Prepare index (in RAM) with some documents and some facets */
 137   protected final void initIndex(int partitionSize) throws Exception {
 138     initIndex(partitionSize, false);
 139   }
 140
 141   /** Prepare index (in RAM/Disk) with some documents and some facets */
 142   protected final void initIndex(int partitionSize, boolean forceDisk) throws Exception {
 143     if (VERBOSE) {
 144       System.out.println("Partition Size: " + partitionSize+"  forceDisk: "+forceDisk);
 145     }
 146
 147     SearchTaxoDirPair pair = dirsPerPartitionSize.get(Integer.valueOf(partitionSize));
 148     if (pair == null) {
 149       pair = new SearchTaxoDirPair();
 150       if (forceDisk) {
 151         pair.searchDir = newFSDirectory(new File(TEST_DIR, "index"));
 152         pair.taxoDir = newFSDirectory(new File(TEST_DIR, "taxo"));
 153       } else {
 154         pair.searchDir = newDirectory();
 155         pair.taxoDir = newDirectory();
 156       }
 157
 158       RandomIndexWriter iw = new RandomIndexWriter(random, pair.searchDir, getIndexWriterConfig(getAnalyzer()));
 159       TaxonomyWriter taxo = new DirectoryTaxonomyWriter(pair.taxoDir, OpenMode.CREATE);
 160
 161       populateIndex(iw, taxo, getFacetIndexingParams(partitionSize));
 162
 163       // commit changes (taxonomy prior to search index for consistency)
 164       taxo.commit();
 165       iw.commit();
 166       taxo.close();
 167       iw.close();
 168
 169       dirsPerPartitionSize.put(Integer.valueOf(partitionSize), pair);
 170     }
 171
 172     // prepare for searching
 173     taxoReader = new DirectoryTaxonomyReader(pair.taxoDir);
 174     indexReader = IndexReader.open(pair.searchDir);
 175     searcher = newSearcher(indexReader);
 176   }
 177
 178   /** Returns indexing params for the main index */
 179   protected IndexWriterConfig getIndexWriterConfig(Analyzer analyzer) {
 180     return newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
 181   }
 182
 183   /** Returns a default facet indexing params */
 184   protected FacetIndexingParams getFacetIndexingParams(final int partSize) {
 185     return new DefaultFacetIndexingParams() {
 186       @Override
 187       protected int fixedPartitionSize() {
 188         return partSize;
 189       }
 190     };
 191   }
 192
 193   /**
 194    * Faceted Search Params for the test.
 195    * Sub classes should override in order to test with different faceted search params.
 196    */
 197   protected FacetSearchParams getFacetedSearchParams() {
 198     return getFacetedSearchParams(Integer.MAX_VALUE);
 199   }
 200
 201   /**
 202    * Faceted Search Params with specified partition size.
 203    * @see #getFacetedSearchParams()
 204    */
 205   protected FacetSearchParams getFacetedSearchParams(int partitionSize) {
 206     FacetSearchParams res = new FacetSearchParams(getFacetIndexingParams(partitionSize));
 207     return res;
 208   }
 209
 210   /**
 211    * Populate the test index+taxonomy for this test.
 212    * <p>Subclasses can override this to test different scenarios
 213    */
 214   protected void populateIndex(RandomIndexWriter iw, TaxonomyWriter taxo, FacetIndexingParams iParams)
 215       throws IOException, CorruptIndexException {
 216     // add test documents
 217     int numDocsToIndex = numDocsToIndex();
 218     for (int doc=0; doc<numDocsToIndex; doc++) {
 219       indexDoc(iParams, iw, taxo, getContent(doc), getCategories(doc));
 220     }
 221
 222     // also add a document that would be deleted, so that all tests are also working against deletions in the index
 223     String content4del = "ContentOfDocToDelete";
 224     indexDoc(iParams, iw, taxo, content4del, getCategories(0));
 225     iw.commit(); // commit it
 226     iw.deleteDocuments(new Term(CONTENT_FIELD,content4del)); // now delete the committed doc
 227   }
 228
 229   /** Close all indexes */
 230   protected void closeAll() throws Exception {
 231     // close and nullify everything
 232     IOUtils.close(taxoReader, indexReader, searcher);
 233     taxoReader = null;
 234     indexReader = null;
 235     searcher = null;
 236   }
 237
 238   /**
 239    * Analyzer to use for the test.
 240    * Sub classes should override in order to test with different analyzer.
 241    */
 242   protected Analyzer getAnalyzer() {
 243     return new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
 244   }
 245
 246   /** convenience method: convert sub results to an array */
 247   protected static FacetResultNode[] resultNodesAsArray(FacetResultNode parentRes) {
 248     ArrayList<FacetResultNode> a = new ArrayList<FacetResultNode>();
 249     for (FacetResultNode frn : parentRes.getSubResults()) {
 250       a.add(frn);
 251     }
 252     return a.toArray(new FacetResultNode[0]);
 253   }
 254
 255   /** utility Create a dummy document with specified categories and content */
 256   protected final void indexDoc(FacetIndexingParams iParams, RandomIndexWriter iw,
 257       TaxonomyWriter tw, String content, List<CategoryPath> categories) throws IOException,
 258       CorruptIndexException {
 259     Document d = new Document();
 260     CategoryDocumentBuilder builder = new CategoryDocumentBuilder(tw, iParams);
 261     builder.setCategoryPaths(categories);
 262     builder.build(d);
 263     d.add(new Field("content", content, Store.YES, Index.ANALYZED, TermVector.NO));
 264     iw.addDocument(d);
 265   }
 266
 267   /** Build the "truth" with ALL the facets enumerating indexes content. */
 268   protected Map<CategoryPath, Integer> facetCountsTruth() throws IOException {
 269     FacetIndexingParams iParams = getFacetIndexingParams(Integer.MAX_VALUE);
 270     String delim = String.valueOf(iParams.getFacetDelimChar());
 271     Map<CategoryPath, Integer> res = new HashMap<CategoryPath, Integer>();
 272     HashSet<Term> handledTerms = new HashSet<Term>();
 273     for (CategoryListParams clp : iParams.getAllCategoryListParams()) {
 274       Term baseTerm = clp.getTerm().createTerm("");
 275       if (!handledTerms.add(baseTerm)) {
 276         continue; // already handled this term (for another list)
 277       }
 278       TermEnum te = indexReader.terms(baseTerm);
 279       while (te.next()) {
 280         Term t = te.term();
 281         if (!t.field().equals(baseTerm.field())) {
 282           break; // hit a different field
 283         }
 284         TermDocs tp = indexReader.termDocs(t);
 285         int cnt = 0;
 286         while (tp.next()) {
 287           if (!indexReader.isDeleted(tp.doc())) { // ignore deleted docs
 288             cnt++;
 289           }
 290         }
 291         res.put(new CategoryPath(t.text().split(delim)), cnt);
 292       }
 293     }
 294     return res;
 295   }
 296
 297   /** Validate counts for returned facets, and that there are not too many results */
 298   protected static void assertCountsAndCardinality(Map<CategoryPath, Integer> facetCountsTruth, List<FacetResult> facetResults) throws Exception {
 299     for (FacetResult fr : facetResults) {
 300       FacetResultNode topResNode = fr.getFacetResultNode();
 301       FacetRequest freq = fr.getFacetRequest();
 302       if (VERBOSE) {
 303         System.out.println(freq.getCategoryPath().toString()+ "\t\t" + topResNode);
 304       }
 305       assertCountsAndCardinality(facetCountsTruth, topResNode, freq.getNumResults());
 306     }
 307   }
 308
 309   /** Validate counts for returned facets, and that there are not too many results */
 310   private static void assertCountsAndCardinality(Map<CategoryPath,Integer> facetCountsTruth,  FacetResultNode resNode, int reqNumResults) throws Exception {
 311     int actualNumResults = resNode.getNumSubResults();
 312     if (VERBOSE) {
 313       System.out.println("NumResults: " + actualNumResults);
 314     }
 315     assertTrue("Too many results!", actualNumResults <= reqNumResults);
 316     for (FacetResultNode subRes : resNode.getSubResults()) {
 317       assertEquals("wrong count for: "+subRes, facetCountsTruth.get(subRes.getLabel()).intValue(), (int)subRes.getValue());
 318       assertCountsAndCardinality(facetCountsTruth, subRes, reqNumResults); // recurse into child results
 319     }
 320   }
 321
 322   /** Validate results equality */
 323   protected static void assertSameResults(List<FacetResult> expected,
 324                                           List<FacetResult> actual) {
 325     String expectedResults = resStringValueOnly(expected);
 326     String actualResults = resStringValueOnly(actual);
 327     if (!expectedResults.equals(actualResults)) {
 328       System.err.println("Results are not the same!");
 329       System.err.println("Expected:\n" + expectedResults);
 330       System.err.println("Actual" + actualResults);
 331       throw new NotSameResultError();
 332     }
 333   }
 334
 335   /** exclude the residue and numDecendants because it is incorrect in sampling */
 336   private static final String resStringValueOnly(List<FacetResult> results) {
 337     StringBuilder sb = new StringBuilder();
 338     for (FacetResult facetRes : results) {
 339       sb.append(facetRes.toString()).append('\n');
 340     }
 341     return sb.toString().replaceAll("Residue:.*.0", "").replaceAll("Num valid Descendants.*", "");
 342   }
 343
 344   /** Special Error class for ability to ignore only this error and retry... */
 345   public static class NotSameResultError extends Error {
 346     public NotSameResultError() {
 347       super("Results are not the same!");
 348     }
 349   }
 350
 351 }