1 package org.apache.lucene.facet;
4 import java.io.IOException;
5 import java.util.ArrayList;
6 import java.util.Arrays;
7 import java.util.HashMap;
8 import java.util.HashSet;
12 import org.apache.lucene.DocumentBuilder.DocumentBuilderException;
13 import org.apache.lucene.analysis.Analyzer;
14 import org.apache.lucene.analysis.MockAnalyzer;
15 import org.apache.lucene.analysis.MockTokenizer;
16 import org.apache.lucene.document.Document;
17 import org.apache.lucene.document.Field;
18 import org.apache.lucene.document.Field.Index;
19 import org.apache.lucene.document.Field.Store;
20 import org.apache.lucene.document.Field.TermVector;
21 import org.apache.lucene.index.CorruptIndexException;
22 import org.apache.lucene.index.IndexReader;
23 import org.apache.lucene.index.IndexWriterConfig;
24 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
25 import org.apache.lucene.index.RandomIndexWriter;
26 import org.apache.lucene.index.Term;
27 import org.apache.lucene.index.TermDocs;
28 import org.apache.lucene.index.TermEnum;
29 import org.apache.lucene.search.IndexSearcher;
30 import org.apache.lucene.store.Directory;
32 import org.apache.lucene.util.LuceneTestCase;
33 import org.apache.lucene.util._TestUtil;
34 import org.apache.lucene.facet.index.CategoryDocumentBuilder;
35 import org.apache.lucene.facet.index.params.CategoryListParams;
36 import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
37 import org.apache.lucene.facet.index.params.FacetIndexingParams;
38 import org.apache.lucene.facet.search.params.FacetRequest;
39 import org.apache.lucene.facet.search.params.FacetSearchParams;
40 import org.apache.lucene.facet.search.results.FacetResult;
41 import org.apache.lucene.facet.search.results.FacetResultNode;
42 import org.apache.lucene.facet.taxonomy.CategoryPath;
43 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
44 import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
45 import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader;
46 import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
49 * Licensed to the Apache Software Foundation (ASF) under one or more
50 * contributor license agreements. See the NOTICE file distributed with
51 * this work for additional information regarding copyright ownership.
52 * The ASF licenses this file to You under the Apache License, Version 2.0
53 * (the "License"); you may not use this file except in compliance with
54 * the License. You may obtain a copy of the License at
56 * http://www.apache.org/licenses/LICENSE-2.0
58 * Unless required by applicable law or agreed to in writing, software
59 * distributed under the License is distributed on an "AS IS" BASIS,
60 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
61 * See the License for the specific language governing permissions and
62 * limitations under the License.
65 /** Base faceted search test. */
66 public abstract class FacetTestBase extends LuceneTestCase {
68 /** Documents text field. */
69 protected static final String CONTENT_FIELD = "content";
71 /** Directory for the index */
72 protected Directory indexDir;
74 /** Directory for the taxonomy */
75 protected Directory taxoDir;
77 /** taxonomy Reader for the test. */
78 protected TaxonomyReader taxoReader;
80 /** Index Reader for the test. */
81 protected IndexReader indexReader;
83 /** Searcher for the test. */
84 protected IndexSearcher searcher;
86 /** documents text (for the text field). */
87 private static final String[] DEFAULT_CONTENT = {
88 "the white car is the one I want.",
89 "the white dog does not belong to anyone.",
92 /** Facets: facets[D][F] == category-path no. F for document no. D. */
93 private static final CategoryPath[][] DEFAULT_CATEGORIES = {
94 { new CategoryPath("root","a","f1"), new CategoryPath("root","a","f2") },
95 { new CategoryPath("root","a","f1"), new CategoryPath("root","a","f3") },
98 /** categories to be added to specified doc */
99 protected List<CategoryPath> getCategories(int doc) {
100 return Arrays.asList(DEFAULT_CATEGORIES[doc]);
103 /** Number of documents to index */
104 protected int numDocsToIndex() {
105 return DEFAULT_CONTENT.length;
108 /** content to be added to specified doc */
109 protected String getContent(int doc) {
110 return DEFAULT_CONTENT[doc];
113 /** Prepare index (in RAM) with single partition */
114 protected final void initIndex() throws Exception {
115 initIndex(Integer.MAX_VALUE);
118 /** Prepare index (in RAM) with some documents and some facets */
119 protected final void initIndex(int partitionSize) throws Exception {
120 initIndex(partitionSize, false);
123 /** Prepare index (in RAM/Disk) with some documents and some facets */
124 protected final void initIndex(int partitionSize, boolean onDisk) throws Exception {
126 System.out.println("Partition Size: " + partitionSize+" onDisk: "+onDisk);
130 File indexFile = _TestUtil.getTempDir("index");
131 indexDir = newFSDirectory(indexFile);
132 taxoDir = newFSDirectory(new File(indexFile,"facets"));
134 indexDir = newDirectory();
135 taxoDir = newDirectory();
138 RandomIndexWriter iw = new RandomIndexWriter(random, indexDir, getIndexWriterConfig(getAnalyzer()));
139 TaxonomyWriter taxo = new LuceneTaxonomyWriter(taxoDir, OpenMode.CREATE);
141 populateIndex(iw, taxo, getFacetIndexingParams(partitionSize));
143 // commit changes (taxonomy prior to search index for consistency)
149 // prepare for searching
150 taxoReader = new LuceneTaxonomyReader(taxoDir);
151 indexReader = IndexReader.open(indexDir);
152 searcher = newSearcher(indexReader);
155 /** Returns indexing params for the main index */
156 protected IndexWriterConfig getIndexWriterConfig(Analyzer analyzer) {
157 return newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
160 /** Returns a default facet indexing params */
161 protected FacetIndexingParams getFacetIndexingParams(final int partSize) {
162 return new DefaultFacetIndexingParams() {
164 protected int fixedPartitionSize() {
171 * Faceted Search Params for the test.
172 * Sub classes should override in order to test with different faceted search params.
174 protected FacetSearchParams getFacetedSearchParams() {
175 return getFacetedSearchParams(Integer.MAX_VALUE);
179 * Faceted Search Params with specified partition size.
180 * @see #getFacetedSearchParams()
182 protected FacetSearchParams getFacetedSearchParams(int partitionSize) {
183 FacetSearchParams res = new FacetSearchParams(getFacetIndexingParams(partitionSize));
188 * Populate the test index+taxonomy for this test.
189 * <p>Subclasses can override this to test different scenarios
191 protected void populateIndex(RandomIndexWriter iw, TaxonomyWriter taxo, FacetIndexingParams iParams)
192 throws IOException, DocumentBuilderException, CorruptIndexException {
193 // add test documents
194 int numDocsToIndex = numDocsToIndex();
195 for (int doc=0; doc<numDocsToIndex; doc++) {
196 indexDoc(iParams, iw, taxo, getContent(doc), getCategories(doc));
199 // also add a document that would be deleted, so that all tests are also working against deletions in the index
200 String content4del = "ContentOfDocToDelete";
201 indexDoc(iParams, iw, taxo, content4del, getCategories(0));
202 iw.commit(); // commit it
203 iw.deleteDocuments(new Term(CONTENT_FIELD,content4del)); // now delete the committed doc
206 /** Close all indexes */
207 protected void closeAll() throws Exception {
208 // close and nullify everything
222 * Analyzer to use for the test.
223 * Sub classes should override in order to test with different analyzer.
225 protected Analyzer getAnalyzer() {
226 return new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
229 /** convenience method: convert sub results to an array */
230 protected static FacetResultNode[] resultNodesAsArray(FacetResultNode parentRes) {
231 ArrayList<FacetResultNode> a = new ArrayList<FacetResultNode>();
232 for (FacetResultNode frn : parentRes.getSubResults()) {
235 return a.toArray(new FacetResultNode[0]);
238 /** utility Create a dummy document with specified categories and content */
239 protected final void indexDoc(FacetIndexingParams iParams, RandomIndexWriter iw,
240 TaxonomyWriter tw, String content, List<CategoryPath> categories) throws IOException,
241 CorruptIndexException {
242 Document d = new Document();
243 CategoryDocumentBuilder builder = new CategoryDocumentBuilder(tw, iParams);
244 builder.setCategoryPaths(categories);
246 d.add(new Field("content", content, Store.YES, Index.ANALYZED, TermVector.NO));
250 /** Build the "truth" with ALL the facets enumerating indexes content. */
251 protected Map<CategoryPath, Integer> facetCountsTruth() throws IOException {
252 FacetIndexingParams iParams = getFacetIndexingParams(Integer.MAX_VALUE);
253 String delim = String.valueOf(iParams.getFacetDelimChar());
254 Map<CategoryPath, Integer> res = new HashMap<CategoryPath, Integer>();
255 HashSet<Term> handledTerms = new HashSet<Term>();
256 for (CategoryListParams clp : iParams.getAllCategoryListParams()) {
257 Term baseTerm = clp.getTerm().createTerm("");
258 if (!handledTerms.add(baseTerm)) {
259 continue; // already handled this term (for another list)
261 TermEnum te = indexReader.terms(baseTerm);
264 if (!t.field().equals(baseTerm.field())) {
265 break; // hit a different field
267 TermDocs tp = indexReader.termDocs(t);
270 if (!indexReader.isDeleted(tp.doc())) { // ignore deleted docs
274 res.put(new CategoryPath(t.text().split(delim)), cnt);
280 /** Validate counts for returned facets, and that there are not too many results */
281 protected static void assertCountsAndCardinality(Map<CategoryPath, Integer> facetCountsTruth, List<FacetResult> facetResults) throws Exception {
282 for (FacetResult fr : facetResults) {
283 FacetResultNode topResNode = fr.getFacetResultNode();
284 FacetRequest freq = fr.getFacetRequest();
286 System.out.println(freq.getCategoryPath().toString()+ "\t\t" + topResNode);
288 assertCountsAndCardinality(facetCountsTruth, topResNode, freq.getNumResults());
292 /** Validate counts for returned facets, and that there are not too many results */
293 private static void assertCountsAndCardinality(Map<CategoryPath,Integer> facetCountsTruth, FacetResultNode resNode, int reqNumResults) throws Exception {
294 int actualNumResults = resNode.getNumSubResults();
296 System.out.println("NumResults: " + actualNumResults);
298 assertTrue("Too many results!", actualNumResults <= reqNumResults);
299 for (FacetResultNode subRes : resNode.getSubResults()) {
300 assertEquals("wrong count for: "+subRes, facetCountsTruth.get(subRes.getLabel()).intValue(), (int)subRes.getValue());
301 assertCountsAndCardinality(facetCountsTruth, subRes, reqNumResults); // recurse into child results
305 /** Validate results equality */
306 protected static void assertSameResults(List<FacetResult> expected,
307 List<FacetResult> actual) {
308 String expectedResults = resStringValueOnly(expected);
309 String actualResults = resStringValueOnly(actual);
310 if (!expectedResults.equals(actualResults)) {
311 System.err.println("Results are not the same!");
312 System.err.println("Expected:\n" + expectedResults);
313 System.err.println("Actual" + actualResults);
314 fail("Results are not the same!");
318 /** exclude the residue and numDecendants because it is incorrect in sampling */
319 private static final String resStringValueOnly(List<FacetResult> results) {
320 StringBuilder sb = new StringBuilder();
321 for (FacetResult facetRes : results) {
322 sb.append(facetRes.toString()).append('\n');
324 return sb.toString().replaceAll("Residue:.*.0", "").replaceAll("Num valid Descendants.*", "");