lucene-java-3.4.0/lucene/contrib/facet/src/test/org/apache/lucene/facet/FacetTestBase.java

   1 package org.apache.lucene.facet;
   2
   3 import java.io.File;
   4 import java.io.IOException;
   5 import java.util.ArrayList;
   6 import java.util.Arrays;
   7 import java.util.HashMap;
   8 import java.util.HashSet;
   9 import java.util.List;
  10 import java.util.Map;
  11
  12 import org.apache.lucene.DocumentBuilder.DocumentBuilderException;
  13 import org.apache.lucene.analysis.Analyzer;
  14 import org.apache.lucene.analysis.MockAnalyzer;
  15 import org.apache.lucene.analysis.MockTokenizer;
  16 import org.apache.lucene.document.Document;
  17 import org.apache.lucene.document.Field;
  18 import org.apache.lucene.document.Field.Index;
  19 import org.apache.lucene.document.Field.Store;
  20 import org.apache.lucene.document.Field.TermVector;
  21 import org.apache.lucene.index.CorruptIndexException;
  22 import org.apache.lucene.index.IndexReader;
  23 import org.apache.lucene.index.IndexWriterConfig;
  24 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
  25 import org.apache.lucene.index.RandomIndexWriter;
  26 import org.apache.lucene.index.Term;
  27 import org.apache.lucene.index.TermDocs;
  28 import org.apache.lucene.index.TermEnum;
  29 import org.apache.lucene.search.IndexSearcher;
  30 import org.apache.lucene.store.Directory;
  31
  32 import org.apache.lucene.util.LuceneTestCase;
  33 import org.apache.lucene.util._TestUtil;
  34 import org.apache.lucene.facet.index.CategoryDocumentBuilder;
  35 import org.apache.lucene.facet.index.params.CategoryListParams;
  36 import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
  37 import org.apache.lucene.facet.index.params.FacetIndexingParams;
  38 import org.apache.lucene.facet.search.params.FacetRequest;
  39 import org.apache.lucene.facet.search.params.FacetSearchParams;
  40 import org.apache.lucene.facet.search.results.FacetResult;
  41 import org.apache.lucene.facet.search.results.FacetResultNode;
  42 import org.apache.lucene.facet.taxonomy.CategoryPath;
  43 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
  44 import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
  45 import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader;
  46 import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
  47
  48 /**
  49  * Licensed to the Apache Software Foundation (ASF) under one or more
  50  * contributor license agreements.  See the NOTICE file distributed with
  51  * this work for additional information regarding copyright ownership.
  52  * The ASF licenses this file to You under the Apache License, Version 2.0
  53  * (the "License"); you may not use this file except in compliance with
  54  * the License.  You may obtain a copy of the License at
  55  *
  56  *     http://www.apache.org/licenses/LICENSE-2.0
  57  *
  58  * Unless required by applicable law or agreed to in writing, software
  59  * distributed under the License is distributed on an "AS IS" BASIS,
  60  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  61  * See the License for the specific language governing permissions and
  62  * limitations under the License.
  63  */
  64
  65 /** Base faceted search test. */
  66 public abstract class FacetTestBase extends LuceneTestCase {
  67
  68   /** Documents text field. */
  69   protected static final String CONTENT_FIELD = "content";
  70
  71   /** Directory for the index */
  72   protected Directory indexDir;
  73
  74   /** Directory for the taxonomy */
  75   protected Directory taxoDir;
  76
  77   /** taxonomy Reader for the test. */
  78   protected TaxonomyReader taxoReader;
  79
  80   /** Index Reader for the test. */
  81   protected IndexReader indexReader;
  82
  83   /** Searcher for the test. */
  84   protected IndexSearcher searcher;
  85
  86   /** documents text (for the text field). */
  87   private static final String[] DEFAULT_CONTENT = {
  88       "the white car is the one I want.",
  89       "the white dog does not belong to anyone.",
  90   };
  91
  92   /** Facets: facets[D][F] == category-path no. F for document no. D. */
  93   private static final CategoryPath[][] DEFAULT_CATEGORIES = {
  94       { new CategoryPath("root","a","f1"), new CategoryPath("root","a","f2") },
  95       { new CategoryPath("root","a","f1"), new CategoryPath("root","a","f3") },
  96   };
  97
  98   /** categories to be added to specified doc */
  99   protected List<CategoryPath> getCategories(int doc) {
 100     return Arrays.asList(DEFAULT_CATEGORIES[doc]);
 101   }
 102
 103   /** Number of documents to index */
 104   protected int numDocsToIndex() {
 105     return DEFAULT_CONTENT.length;
 106   }
 107
 108   /** content to be added to specified doc */
 109   protected String getContent(int doc) {
 110     return DEFAULT_CONTENT[doc];
 111   }
 112
 113   /** Prepare index (in RAM) with single partition */
 114   protected final void initIndex() throws Exception {
 115     initIndex(Integer.MAX_VALUE);
 116   }
 117
 118   /** Prepare index (in RAM) with some documents and some facets */
 119   protected final void initIndex(int partitionSize) throws Exception {
 120     initIndex(partitionSize, false);
 121   }
 122
 123   /** Prepare index (in RAM/Disk) with some documents and some facets */
 124   protected final void initIndex(int partitionSize, boolean onDisk) throws Exception {
 125     if (VERBOSE) {
 126       System.out.println("Partition Size: " + partitionSize+"  onDisk: "+onDisk);
 127     }
 128
 129     if (onDisk) {
 130       File indexFile = _TestUtil.getTempDir("index");
 131       indexDir = newFSDirectory(indexFile);
 132       taxoDir = newFSDirectory(new File(indexFile,"facets"));
 133     } else {
 134       indexDir = newDirectory();
 135       taxoDir = newDirectory();
 136     }
 137
 138     RandomIndexWriter iw = new RandomIndexWriter(random, indexDir, getIndexWriterConfig(getAnalyzer()));
 139     TaxonomyWriter taxo = new LuceneTaxonomyWriter(taxoDir, OpenMode.CREATE);
 140
 141     populateIndex(iw, taxo, getFacetIndexingParams(partitionSize));
 142
 143     // commit changes (taxonomy prior to search index for consistency)
 144     taxo.commit();
 145     iw.commit();
 146     taxo.close();
 147     iw.close();
 148
 149     // prepare for searching
 150     taxoReader = new LuceneTaxonomyReader(taxoDir);
 151     indexReader = IndexReader.open(indexDir);
 152     searcher = newSearcher(indexReader);
 153   }
 154
 155   /** Returns indexing params for the main index */
 156   protected IndexWriterConfig getIndexWriterConfig(Analyzer analyzer) {
 157     return newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
 158   }
 159
 160   /** Returns a default facet indexing params */
 161   protected FacetIndexingParams getFacetIndexingParams(final int partSize) {
 162     return new DefaultFacetIndexingParams() {
 163       @Override
 164       protected int fixedPartitionSize() {
 165         return partSize;
 166       }
 167     };
 168   }
 169
 170   /**
 171    * Faceted Search Params for the test.
 172    * Sub classes should override in order to test with different faceted search params.
 173    */
 174   protected FacetSearchParams getFacetedSearchParams() {
 175     return getFacetedSearchParams(Integer.MAX_VALUE);
 176   }
 177
 178   /**
 179    * Faceted Search Params with specified partition size.
 180    * @see #getFacetedSearchParams()
 181    */
 182   protected FacetSearchParams getFacetedSearchParams(int partitionSize) {
 183     FacetSearchParams res = new FacetSearchParams(getFacetIndexingParams(partitionSize));
 184     return res;
 185   }
 186
 187   /**
 188    * Populate the test index+taxonomy for this test.
 189    * <p>Subclasses can override this to test different scenarios
 190    */
 191   protected void populateIndex(RandomIndexWriter iw, TaxonomyWriter taxo, FacetIndexingParams iParams)
 192       throws IOException, DocumentBuilderException, CorruptIndexException {
 193     // add test documents
 194     int numDocsToIndex = numDocsToIndex();
 195     for (int doc=0; doc<numDocsToIndex; doc++) {
 196       indexDoc(iParams, iw, taxo, getContent(doc), getCategories(doc));
 197     }
 198
 199     // also add a document that would be deleted, so that all tests are also working against deletions in the index
 200     String content4del = "ContentOfDocToDelete";
 201     indexDoc(iParams, iw, taxo, content4del, getCategories(0));
 202     iw.commit(); // commit it
 203     iw.deleteDocuments(new Term(CONTENT_FIELD,content4del)); // now delete the committed doc
 204   }
 205
 206   /** Close all indexes */
 207   protected void closeAll() throws Exception {
 208     // close and nullify everything
 209     taxoReader.close();
 210     taxoReader = null;
 211     indexReader.close();
 212     indexReader = null;
 213     searcher.close();
 214     searcher = null;
 215     indexDir.close();
 216     indexDir = null;
 217     taxoDir.close();
 218     taxoDir = null;
 219   }
 220
 221   /**
 222    * Analyzer to use for the test.
 223    * Sub classes should override in order to test with different analyzer.
 224    */
 225   protected Analyzer getAnalyzer() {
 226     return new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
 227   }
 228
 229   /** convenience method: convert sub results to an array */
 230   protected static FacetResultNode[] resultNodesAsArray(FacetResultNode parentRes) {
 231     ArrayList<FacetResultNode> a = new ArrayList<FacetResultNode>();
 232     for (FacetResultNode frn : parentRes.getSubResults()) {
 233       a.add(frn);
 234     }
 235     return a.toArray(new FacetResultNode[0]);
 236   }
 237
 238   /** utility Create a dummy document with specified categories and content */
 239   protected final void indexDoc(FacetIndexingParams iParams, RandomIndexWriter iw,
 240       TaxonomyWriter tw, String content, List<CategoryPath> categories) throws IOException,
 241       CorruptIndexException {
 242     Document d = new Document();
 243     CategoryDocumentBuilder builder = new CategoryDocumentBuilder(tw, iParams);
 244     builder.setCategoryPaths(categories);
 245     builder.build(d);
 246     d.add(new Field("content", content, Store.YES, Index.ANALYZED, TermVector.NO));
 247     iw.addDocument(d);
 248   }
 249
 250   /** Build the "truth" with ALL the facets enumerating indexes content. */
 251   protected Map<CategoryPath, Integer> facetCountsTruth() throws IOException {
 252     FacetIndexingParams iParams = getFacetIndexingParams(Integer.MAX_VALUE);
 253     String delim = String.valueOf(iParams.getFacetDelimChar());
 254     Map<CategoryPath, Integer> res = new HashMap<CategoryPath, Integer>();
 255     HashSet<Term> handledTerms = new HashSet<Term>();
 256     for (CategoryListParams clp : iParams.getAllCategoryListParams()) {
 257       Term baseTerm = clp.getTerm().createTerm("");
 258       if (!handledTerms.add(baseTerm)) {
 259         continue; // already handled this term (for another list)
 260       }
 261       TermEnum te = indexReader.terms(baseTerm);
 262       while (te.next()) {
 263         Term t = te.term();
 264         if (!t.field().equals(baseTerm.field())) {
 265           break; // hit a different field
 266         }
 267         TermDocs tp = indexReader.termDocs(t);
 268         int cnt = 0;
 269         while (tp.next()) {
 270           if (!indexReader.isDeleted(tp.doc())) { // ignore deleted docs
 271             cnt++;
 272           }
 273         }
 274         res.put(new CategoryPath(t.text().split(delim)), cnt);
 275       }
 276     }
 277     return res;
 278   }
 279
 280   /** Validate counts for returned facets, and that there are not too many results */
 281   protected static void assertCountsAndCardinality(Map<CategoryPath, Integer> facetCountsTruth, List<FacetResult> facetResults) throws Exception {
 282     for (FacetResult fr : facetResults) {
 283       FacetResultNode topResNode = fr.getFacetResultNode();
 284       FacetRequest freq = fr.getFacetRequest();
 285       if (VERBOSE) {
 286         System.out.println(freq.getCategoryPath().toString()+ "\t\t" + topResNode);
 287       }
 288       assertCountsAndCardinality(facetCountsTruth, topResNode, freq.getNumResults());
 289     }
 290   }
 291
 292   /** Validate counts for returned facets, and that there are not too many results */
 293   private static void assertCountsAndCardinality(Map<CategoryPath,Integer> facetCountsTruth,  FacetResultNode resNode, int reqNumResults) throws Exception {
 294     int actualNumResults = resNode.getNumSubResults();
 295     if (VERBOSE) {
 296       System.out.println("NumResults: " + actualNumResults);
 297     }
 298     assertTrue("Too many results!", actualNumResults <= reqNumResults);
 299     for (FacetResultNode subRes : resNode.getSubResults()) {
 300       assertEquals("wrong count for: "+subRes, facetCountsTruth.get(subRes.getLabel()).intValue(), (int)subRes.getValue());
 301       assertCountsAndCardinality(facetCountsTruth, subRes, reqNumResults); // recurse into child results
 302     }
 303   }
 304
 305   /** Validate results equality */
 306   protected static void assertSameResults(List<FacetResult> expected,
 307                                           List<FacetResult> actual) {
 308     String expectedResults = resStringValueOnly(expected);
 309     String actualResults = resStringValueOnly(actual);
 310     if (!expectedResults.equals(actualResults)) {
 311       System.err.println("Results are not the same!");
 312       System.err.println("Expected:\n" + expectedResults);
 313       System.err.println("Actual" + actualResults);
 314       fail("Results are not the same!");
 315     }
 316   }
 317
 318   /** exclude the residue and numDecendants because it is incorrect in sampling */
 319   private static final String resStringValueOnly(List<FacetResult> results) {
 320     StringBuilder sb = new StringBuilder();
 321     for (FacetResult facetRes : results) {
 322       sb.append(facetRes.toString()).append('\n');
 323     }
 324     return sb.toString().replaceAll("Residue:.*.0", "").replaceAll("Num valid Descendants.*", "");
 325   }
 326 }