1 package org.apache.lucene.facet.example.multiCL;
4 import java.util.Random;
6 import org.apache.lucene.document.Document;
7 import org.apache.lucene.document.Field;
8 import org.apache.lucene.document.Field.Index;
9 import org.apache.lucene.document.Field.Store;
10 import org.apache.lucene.index.IndexWriter;
11 import org.apache.lucene.index.IndexWriterConfig;
12 import org.apache.lucene.index.Term;
13 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
14 import org.apache.lucene.store.Directory;
15 import org.apache.lucene.store.RAMDirectory;
17 import org.apache.lucene.facet.example.ExampleUtils;
18 import org.apache.lucene.facet.example.simple.SimpleUtils;
19 import org.apache.lucene.facet.index.CategoryDocumentBuilder;
20 import org.apache.lucene.facet.index.params.CategoryListParams;
21 import org.apache.lucene.facet.index.params.FacetIndexingParams;
22 import org.apache.lucene.facet.index.params.PerDimensionIndexingParams;
23 import org.apache.lucene.facet.taxonomy.CategoryPath;
24 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
27 * Licensed to the Apache Software Foundation (ASF) under one or more
28 * contributor license agreements. See the NOTICE file distributed with
29 * this work for additional information regarding copyright ownership.
30 * The ASF licenses this file to You under the Apache License, Version 2.0
31 * (the "License"); you may not use this file except in compliance with
32 * the License. You may obtain a copy of the License at
34 * http://www.apache.org/licenses/LICENSE-2.0
36 * Unless required by applicable law or agreed to in writing, software
37 * distributed under the License is distributed on an "AS IS" BASIS,
38 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
39 * See the License for the specific language governing permissions and
40 * limitations under the License.
44 * Sample indexer creates an index, and adds to it sample documents and facets
45 * with multiple CategoryLists specified for different facets, so there are different
46 * category lists for different facets.
48 * @lucene.experimental
50 public class MultiCLIndexer {
52 // Number of documents to index
53 public static int NUM_DOCS = 100;
54 // Number of facets to add per document
55 public static int NUM_FACETS_PER_DOC = 10;
56 // Number of tokens in title
57 public static int TITLE_LENGTH = 5;
58 // Number of tokens in text
59 public static int TEXT_LENGTH = 100;
61 // Lorum ipsum to use as content - this will be tokenized and used for document
63 static String words = "Sed ut perspiciatis unde omnis iste natus error sit "
64 + "voluptatem accusantium doloremque laudantium totam rem aperiam "
65 + "eaque ipsa quae ab illo inventore veritatis et quasi architecto "
66 + "beatae vitae dicta sunt explicabo Nemo enim ipsam voluptatem "
67 + "quia voluptas sit aspernatur aut odit aut fugit sed quia consequuntur "
68 + "magni dolores eos qui ratione voluptatem sequi nesciunt Neque porro "
69 + "quisquam est qui dolorem ipsum quia dolor sit amet consectetur adipisci velit "
70 + "sed quia non numquam eius modi tempora incidunt ut labore et dolore "
71 + "magnam aliquam quaerat voluptatem Ut enim ad minima veniam "
72 + "quis nostrum exercitationem ullam corporis suscipit laboriosam "
73 + "nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure"
74 + "reprehenderit qui in ea voluptate velit esse quam nihil molestiae "
75 + "consequatur vel illum qui dolorem eum fugiat quo voluptas nulla pariatur";
76 // PerDimensionIndexingParams for multiple category lists
77 public static PerDimensionIndexingParams MULTI_IPARAMS = new PerDimensionIndexingParams();
79 // Initialize PerDimensionIndexingParams
81 MULTI_IPARAMS.addCategoryListParams(new CategoryPath("0"),
82 new CategoryListParams(new Term("$Digits", "Zero")));
83 MULTI_IPARAMS.addCategoryListParams(new CategoryPath("1"),
84 new CategoryListParams(new Term("$Digits", "One")));
85 MULTI_IPARAMS.addCategoryListParams(new CategoryPath("2"),
86 new CategoryListParams(new Term("$Digits", "Two")));
87 MULTI_IPARAMS.addCategoryListParams(new CategoryPath("3"),
88 new CategoryListParams(new Term("$Digits", "Three")));
89 MULTI_IPARAMS.addCategoryListParams(new CategoryPath("4"),
90 new CategoryListParams(new Term("$Digits", "Four")));
91 MULTI_IPARAMS.addCategoryListParams(new CategoryPath("5"),
92 new CategoryListParams(new Term("$Digits", "Five")));
96 * Create an index, and adds to it sample documents and facets.
97 * @param indexDir Directory in which the index should be created.
98 * @param taxoDir Directory in which the taxonomy index should be created.
99 * @throws Exception on error (no detailed exception handling here for sample simplicity
101 public static void index(Directory indexDir, Directory taxoDir)
104 Random random = new Random(2003);
106 String[] docTitles = new String[NUM_DOCS];
107 String[] docTexts = new String[NUM_DOCS];
108 CategoryPath[][] cPaths = new CategoryPath[NUM_DOCS][NUM_FACETS_PER_DOC];
110 String[] tokens = words.split(" ");
111 for (int docNum = 0; docNum < NUM_DOCS; docNum++) {
114 for (int j = 0; j < TITLE_LENGTH; j++) {
115 title = title + tokens[random.nextInt(tokens.length)] + " ";
117 docTitles[docNum] = title;
119 for (int j = 0; j < TEXT_LENGTH; j++) {
120 text = text + tokens[random.nextInt(tokens.length)] + " ";
122 docTexts[docNum] = text;
124 for (int facetNum = 0; facetNum < NUM_FACETS_PER_DOC; facetNum++) {
125 cPaths[docNum][facetNum] = new CategoryPath(Integer
126 .toString(random.nextInt(7)), Integer.toString(random.nextInt(10)));
129 index(indexDir, taxoDir, MULTI_IPARAMS, docTitles, docTexts, cPaths);
133 * More advanced method for specifying custom indexing params, doc texts,
134 * doc titles and category paths.
136 public static void index(Directory indexDir, Directory taxoDir,
137 FacetIndexingParams iParams, String[] docTitles,
138 String[] docTexts, CategoryPath[][] cPaths) throws Exception {
139 // create and open an index writer
140 IndexWriter iw = new IndexWriter(indexDir, new IndexWriterConfig(
141 ExampleUtils.EXAMPLE_VER, SimpleUtils.analyzer).setOpenMode(OpenMode.CREATE));
142 // create and open a taxonomy writer
143 DirectoryTaxonomyWriter taxo = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
144 index(iw, taxo, iParams, docTitles, docTexts, cPaths);
148 * More advanced method for specifying custom indexing params, doc texts,
149 * doc titles and category paths.
151 * Create an index, and adds to it sample documents and facets.
153 * on error (no detailed exception handling here for sample
156 public static void index(IndexWriter iw, DirectoryTaxonomyWriter taxo,
157 FacetIndexingParams iParams, String[] docTitles,
158 String[] docTexts, CategoryPath[][] cPaths) throws Exception {
160 // loop over sample documents
162 int nFacetsAdded = 0;
163 for (int docNum = 0; docNum < SimpleUtils.docTexts.length; docNum++) {
164 List<CategoryPath> facetList = SimpleUtils.categoryPathArrayToList(cPaths[docNum]);
166 // we do not alter indexing parameters!
167 // a category document builder will add the categories to a document
168 // once build() is called
169 CategoryDocumentBuilder categoryDocBuilder = new CategoryDocumentBuilder(
170 taxo, iParams).setCategoryPaths(facetList);
172 // create a plain Lucene document and add some regular Lucene fields
174 Document doc = new Document();
175 doc.add(new Field(SimpleUtils.TITLE, docTitles[docNum], Store.YES, Index.ANALYZED));
176 doc.add(new Field(SimpleUtils.TEXT, docTexts[docNum], Store.NO, Index.ANALYZED));
178 // finally add the document to the index
179 categoryDocBuilder.build(doc);
183 nFacetsAdded += facetList.size();
187 // we commit changes to the taxonomy index prior to committing them to
189 // this is important, so that all facets referred to by documents in the
191 // will indeed exist in the taxonomy index.
195 // close the taxonomy index and the index - all modifications are
196 // now safely in the provided directories: indexDir and taxoDir.
200 ExampleUtils.log("Indexed " + nDocsAdded + " documents with overall "
201 + nFacetsAdded + " facets.");
204 public static void main(String[] args) throws Exception {
205 index(new RAMDirectory(), new RAMDirectory());