1 package org.apache.lucene.facet;
4 import java.io.IOException;
5 import java.util.ArrayList;
6 import java.util.Arrays;
7 import java.util.HashMap;
8 import java.util.HashSet;
12 import org.apache.lucene.analysis.Analyzer;
13 import org.apache.lucene.analysis.MockAnalyzer;
14 import org.apache.lucene.analysis.MockTokenizer;
15 import org.apache.lucene.document.Document;
16 import org.apache.lucene.document.Field;
17 import org.apache.lucene.document.Field.Index;
18 import org.apache.lucene.document.Field.Store;
19 import org.apache.lucene.document.Field.TermVector;
20 import org.apache.lucene.index.CorruptIndexException;
21 import org.apache.lucene.index.IndexReader;
22 import org.apache.lucene.index.IndexWriterConfig;
23 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
24 import org.apache.lucene.index.RandomIndexWriter;
25 import org.apache.lucene.index.Term;
26 import org.apache.lucene.index.TermDocs;
27 import org.apache.lucene.index.TermEnum;
28 import org.apache.lucene.search.IndexSearcher;
29 import org.apache.lucene.store.Directory;
31 import org.apache.lucene.util.IOUtils;
32 import org.apache.lucene.util.LuceneTestCase;
33 import org.apache.lucene.util._TestUtil;
34 import org.apache.lucene.facet.index.CategoryDocumentBuilder;
35 import org.apache.lucene.facet.index.params.CategoryListParams;
36 import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
37 import org.apache.lucene.facet.index.params.FacetIndexingParams;
38 import org.apache.lucene.facet.search.params.FacetRequest;
39 import org.apache.lucene.facet.search.params.FacetSearchParams;
40 import org.apache.lucene.facet.search.results.FacetResult;
41 import org.apache.lucene.facet.search.results.FacetResultNode;
42 import org.apache.lucene.facet.taxonomy.CategoryPath;
43 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
44 import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
45 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
46 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
47 import org.junit.AfterClass;
48 import org.junit.BeforeClass;
51 * Licensed to the Apache Software Foundation (ASF) under one or more
52 * contributor license agreements. See the NOTICE file distributed with
53 * this work for additional information regarding copyright ownership.
54 * The ASF licenses this file to You under the Apache License, Version 2.0
55 * (the "License"); you may not use this file except in compliance with
56 * the License. You may obtain a copy of the License at
58 * http://www.apache.org/licenses/LICENSE-2.0
60 * Unless required by applicable law or agreed to in writing, software
61 * distributed under the License is distributed on an "AS IS" BASIS,
62 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
63 * See the License for the specific language governing permissions and
64 * limitations under the License.
67 /** Base faceted search test. */
68 public abstract class FacetTestBase extends LuceneTestCase {
70 /** Holds a search and taxonomy Directories pair. */
71 private static final class SearchTaxoDirPair {
72 Directory searchDir, taxoDir;
73 SearchTaxoDirPair() {}
76 private static HashMap<Integer, SearchTaxoDirPair> dirsPerPartitionSize;
77 private static File TEST_DIR;
79 /** Documents text field. */
80 protected static final String CONTENT_FIELD = "content";
82 /** taxonomy Reader for the test. */
83 protected TaxonomyReader taxoReader;
85 /** Index Reader for the test. */
86 protected IndexReader indexReader;
88 /** Searcher for the test. */
89 protected IndexSearcher searcher;
92 public static void beforeClassFacetTestBase() throws Exception {
93 TEST_DIR = _TestUtil.getTempDir("facets");
94 dirsPerPartitionSize = new HashMap<Integer, FacetTestBase.SearchTaxoDirPair>();
98 public static void afterClassFacetTestBase() throws Exception {
99 for (SearchTaxoDirPair pair : dirsPerPartitionSize.values()) {
100 IOUtils.close(pair.searchDir, pair.taxoDir);
104 /** documents text (for the text field). */
105 private static final String[] DEFAULT_CONTENT = {
106 "the white car is the one I want.",
107 "the white dog does not belong to anyone.",
110 /** Facets: facets[D][F] == category-path no. F for document no. D. */
111 private static final CategoryPath[][] DEFAULT_CATEGORIES = {
112 { new CategoryPath("root","a","f1"), new CategoryPath("root","a","f2") },
113 { new CategoryPath("root","a","f1"), new CategoryPath("root","a","f3") },
116 /** categories to be added to specified doc */
117 protected List<CategoryPath> getCategories(int doc) {
118 return Arrays.asList(DEFAULT_CATEGORIES[doc]);
121 /** Number of documents to index */
122 protected int numDocsToIndex() {
123 return DEFAULT_CONTENT.length;
126 /** content to be added to specified doc */
127 protected String getContent(int doc) {
128 return DEFAULT_CONTENT[doc];
131 /** Prepare index (in RAM) with single partition */
132 protected final void initIndex() throws Exception {
133 initIndex(Integer.MAX_VALUE);
136 /** Prepare index (in RAM) with some documents and some facets */
137 protected final void initIndex(int partitionSize) throws Exception {
138 initIndex(partitionSize, false);
141 /** Prepare index (in RAM/Disk) with some documents and some facets */
142 protected final void initIndex(int partitionSize, boolean forceDisk) throws Exception {
144 System.out.println("Partition Size: " + partitionSize+" forceDisk: "+forceDisk);
147 SearchTaxoDirPair pair = dirsPerPartitionSize.get(Integer.valueOf(partitionSize));
149 pair = new SearchTaxoDirPair();
151 pair.searchDir = newFSDirectory(new File(TEST_DIR, "index"));
152 pair.taxoDir = newFSDirectory(new File(TEST_DIR, "taxo"));
154 pair.searchDir = newDirectory();
155 pair.taxoDir = newDirectory();
158 RandomIndexWriter iw = new RandomIndexWriter(random, pair.searchDir, getIndexWriterConfig(getAnalyzer()));
159 TaxonomyWriter taxo = new DirectoryTaxonomyWriter(pair.taxoDir, OpenMode.CREATE);
161 populateIndex(iw, taxo, getFacetIndexingParams(partitionSize));
163 // commit changes (taxonomy prior to search index for consistency)
169 dirsPerPartitionSize.put(Integer.valueOf(partitionSize), pair);
172 // prepare for searching
173 taxoReader = new DirectoryTaxonomyReader(pair.taxoDir);
174 indexReader = IndexReader.open(pair.searchDir);
175 searcher = newSearcher(indexReader);
178 /** Returns indexing params for the main index */
179 protected IndexWriterConfig getIndexWriterConfig(Analyzer analyzer) {
180 return newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
183 /** Returns a default facet indexing params */
184 protected FacetIndexingParams getFacetIndexingParams(final int partSize) {
185 return new DefaultFacetIndexingParams() {
187 protected int fixedPartitionSize() {
194 * Faceted Search Params for the test.
195 * Sub classes should override in order to test with different faceted search params.
197 protected FacetSearchParams getFacetedSearchParams() {
198 return getFacetedSearchParams(Integer.MAX_VALUE);
202 * Faceted Search Params with specified partition size.
203 * @see #getFacetedSearchParams()
205 protected FacetSearchParams getFacetedSearchParams(int partitionSize) {
206 FacetSearchParams res = new FacetSearchParams(getFacetIndexingParams(partitionSize));
211 * Populate the test index+taxonomy for this test.
212 * <p>Subclasses can override this to test different scenarios
214 protected void populateIndex(RandomIndexWriter iw, TaxonomyWriter taxo, FacetIndexingParams iParams)
215 throws IOException, CorruptIndexException {
216 // add test documents
217 int numDocsToIndex = numDocsToIndex();
218 for (int doc=0; doc<numDocsToIndex; doc++) {
219 indexDoc(iParams, iw, taxo, getContent(doc), getCategories(doc));
222 // also add a document that would be deleted, so that all tests are also working against deletions in the index
223 String content4del = "ContentOfDocToDelete";
224 indexDoc(iParams, iw, taxo, content4del, getCategories(0));
225 iw.commit(); // commit it
226 iw.deleteDocuments(new Term(CONTENT_FIELD,content4del)); // now delete the committed doc
229 /** Close all indexes */
230 protected void closeAll() throws Exception {
231 // close and nullify everything
232 IOUtils.close(taxoReader, indexReader, searcher);
239 * Analyzer to use for the test.
240 * Sub classes should override in order to test with different analyzer.
242 protected Analyzer getAnalyzer() {
243 return new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
246 /** convenience method: convert sub results to an array */
247 protected static FacetResultNode[] resultNodesAsArray(FacetResultNode parentRes) {
248 ArrayList<FacetResultNode> a = new ArrayList<FacetResultNode>();
249 for (FacetResultNode frn : parentRes.getSubResults()) {
252 return a.toArray(new FacetResultNode[0]);
255 /** utility Create a dummy document with specified categories and content */
256 protected final void indexDoc(FacetIndexingParams iParams, RandomIndexWriter iw,
257 TaxonomyWriter tw, String content, List<CategoryPath> categories) throws IOException,
258 CorruptIndexException {
259 Document d = new Document();
260 CategoryDocumentBuilder builder = new CategoryDocumentBuilder(tw, iParams);
261 builder.setCategoryPaths(categories);
263 d.add(new Field("content", content, Store.YES, Index.ANALYZED, TermVector.NO));
267 /** Build the "truth" with ALL the facets enumerating indexes content. */
268 protected Map<CategoryPath, Integer> facetCountsTruth() throws IOException {
269 FacetIndexingParams iParams = getFacetIndexingParams(Integer.MAX_VALUE);
270 String delim = String.valueOf(iParams.getFacetDelimChar());
271 Map<CategoryPath, Integer> res = new HashMap<CategoryPath, Integer>();
272 HashSet<Term> handledTerms = new HashSet<Term>();
273 for (CategoryListParams clp : iParams.getAllCategoryListParams()) {
274 Term baseTerm = clp.getTerm().createTerm("");
275 if (!handledTerms.add(baseTerm)) {
276 continue; // already handled this term (for another list)
278 TermEnum te = indexReader.terms(baseTerm);
281 if (!t.field().equals(baseTerm.field())) {
282 break; // hit a different field
284 TermDocs tp = indexReader.termDocs(t);
287 if (!indexReader.isDeleted(tp.doc())) { // ignore deleted docs
291 res.put(new CategoryPath(t.text().split(delim)), cnt);
297 /** Validate counts for returned facets, and that there are not too many results */
298 protected static void assertCountsAndCardinality(Map<CategoryPath, Integer> facetCountsTruth, List<FacetResult> facetResults) throws Exception {
299 for (FacetResult fr : facetResults) {
300 FacetResultNode topResNode = fr.getFacetResultNode();
301 FacetRequest freq = fr.getFacetRequest();
303 System.out.println(freq.getCategoryPath().toString()+ "\t\t" + topResNode);
305 assertCountsAndCardinality(facetCountsTruth, topResNode, freq.getNumResults());
309 /** Validate counts for returned facets, and that there are not too many results */
310 private static void assertCountsAndCardinality(Map<CategoryPath,Integer> facetCountsTruth, FacetResultNode resNode, int reqNumResults) throws Exception {
311 int actualNumResults = resNode.getNumSubResults();
313 System.out.println("NumResults: " + actualNumResults);
315 assertTrue("Too many results!", actualNumResults <= reqNumResults);
316 for (FacetResultNode subRes : resNode.getSubResults()) {
317 assertEquals("wrong count for: "+subRes, facetCountsTruth.get(subRes.getLabel()).intValue(), (int)subRes.getValue());
318 assertCountsAndCardinality(facetCountsTruth, subRes, reqNumResults); // recurse into child results
322 /** Validate results equality */
323 protected static void assertSameResults(List<FacetResult> expected,
324 List<FacetResult> actual) {
325 String expectedResults = resStringValueOnly(expected);
326 String actualResults = resStringValueOnly(actual);
327 if (!expectedResults.equals(actualResults)) {
328 System.err.println("Results are not the same!");
329 System.err.println("Expected:\n" + expectedResults);
330 System.err.println("Actual" + actualResults);
331 throw new NotSameResultError();
335 /** exclude the residue and numDecendants because it is incorrect in sampling */
336 private static final String resStringValueOnly(List<FacetResult> results) {
337 StringBuilder sb = new StringBuilder();
338 for (FacetResult facetRes : results) {
339 sb.append(facetRes.toString()).append('\n');
341 return sb.toString().replaceAll("Residue:.*.0", "").replaceAll("Num valid Descendants.*", "");
344 /** Special Error class for ability to ignore only this error and retry... */
345 public static class NotSameResultError extends Error {
346 public NotSameResultError() {
347 super("Results are not the same!");