+++ /dev/null
-package org.apache.lucene.facet.index;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-
-import org.apache.lucene.DocumentBuilder;
-import org.apache.lucene.facet.index.attributes.CategoryAttribute;
-import org.apache.lucene.facet.index.attributes.CategoryAttributesIterable;
-import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
-import org.apache.lucene.facet.index.categorypolicy.PathPolicy;
-import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
-import org.apache.lucene.facet.index.params.FacetIndexingParams;
-import org.apache.lucene.facet.index.streaming.CategoryAttributesStream;
-import org.apache.lucene.facet.index.streaming.CategoryListTokenizer;
-import org.apache.lucene.facet.index.streaming.CategoryParentsStream;
-import org.apache.lucene.facet.index.streaming.CategoryTokenizer;
-import org.apache.lucene.facet.index.streaming.CountingListTokenizer;
-import org.apache.lucene.facet.taxonomy.CategoryPath;
-import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * A utility class which allows attachment of {@link CategoryPath}s or
- * {@link CategoryAttribute}s to a given document using a taxonomy.<br>
- * Construction could be done with either a given {@link FacetIndexingParams} or
- * the default implementation {@link DefaultFacetIndexingParams}.<br>
- * A CategoryDocumentBuilder can be reused by repeatedly setting the categories
- * and building the document. Categories are provided either as
- * {@link CategoryAttribute} elements through {@link #setCategories(Iterable)},
- * or as {@link CategoryPath} elements through
- * {@link #setCategoryPaths(Iterable)}.
- * <p>
- * Note that both {@link #setCategories(Iterable)} and
- * {@link #setCategoryPaths(Iterable)} return this
- * {@link CategoryDocumentBuilder}, allowing the following pattern: {@code new
- * CategoryDocumentBuilder(taxonomy,
- * params).setCategories(categories).build(doc)}.
- *
- * @lucene.experimental
- */
-public class CategoryDocumentBuilder implements DocumentBuilder {
-
- /**
- * A {@link TaxonomyWriter} for adding categories and retrieving their
- * ordinals.
- */
- protected final TaxonomyWriter taxonomyWriter;
-
- /**
- * Parameters to be used when indexing categories.
- */
- protected final FacetIndexingParams indexingParams;
-
- /**
- * A list of fields which is filled at ancestors' construction and used
- * during {@link CategoryDocumentBuilder#build(Document)}.
- */
- protected final ArrayList<Field> fieldList = new ArrayList<Field>();
-
- protected Map<String, List<CategoryAttribute>> categoriesMap;
-
- /**
- * Creating a facets document builder with default facet indexing
- * parameters.<br>
- * See:
- * {@link #CategoryDocumentBuilder(TaxonomyWriter, FacetIndexingParams)}
- *
- * @param taxonomyWriter
- * to which new categories will be added, as well as translating
- * known categories to ordinals
- * @throws IOException
- *
- */
- public CategoryDocumentBuilder(TaxonomyWriter taxonomyWriter)
- throws IOException {
- this(taxonomyWriter, new DefaultFacetIndexingParams());
- }
-
- /**
- * Creating a facets document builder with a given facet indexing parameters
- * object.<br>
- *
- * @param taxonomyWriter
- * to which new categories will be added, as well as translating
- * known categories to ordinals
- * @param params
- * holds all parameters the indexing process should use such as
- * category-list parameters
- * @throws IOException
- */
- public CategoryDocumentBuilder(TaxonomyWriter taxonomyWriter,
- FacetIndexingParams params) throws IOException {
- this.taxonomyWriter = taxonomyWriter;
- this.indexingParams = params;
- this.categoriesMap = new HashMap<String, List<CategoryAttribute>>();
- }
-
- /**
- * Set the categories of the document builder from an {@link Iterable} of
- * {@link CategoryPath} objects.
- *
- * @param categoryPaths
- * An iterable of CategoryPath objects which holds the categories
- * (facets) which will be added to the document at
- * {@link #build(Document)}
- * @return This CategoryDocumentBuilder, to enable this one line call:
- * {@code new} {@link #CategoryDocumentBuilder(TaxonomyWriter)}.
- * {@link #setCategoryPaths(Iterable)}.{@link #build(Document)}.
- * @throws IOException
- */
- public CategoryDocumentBuilder setCategoryPaths(
- Iterable<CategoryPath> categoryPaths) throws IOException {
- if (categoryPaths == null) {
- fieldList.clear();
- return this;
- }
- return setCategories(new CategoryAttributesIterable(categoryPaths));
- }
-
- /**
- * Set the categories of the document builder from an {@link Iterable} of
- * {@link CategoryAttribute} objects.
- *
- * @param categories
- * An iterable of {@link CategoryAttribute} objects which holds
- * the categories (facets) which will be added to the document at
- * {@link #build(Document)}
- * @return This CategoryDocumentBuilder, to enable this one line call:
- * {@code new} {@link #CategoryDocumentBuilder(TaxonomyWriter)}.
- * {@link #setCategories(Iterable)}.{@link #build(Document)}.
- * @throws IOException
- */
- public CategoryDocumentBuilder setCategories(
- Iterable<CategoryAttribute> categories) throws IOException {
- fieldList.clear();
- if (categories == null) {
- return this;
- }
-
- // get field-name to a list of facets mapping as different facets could
- // be added to different category-lists on different fields
- fillCategoriesMap(categories);
-
- // creates a different stream for each different field
- for (Entry<String, List<CategoryAttribute>> e : categoriesMap
- .entrySet()) {
- // create a category attributes stream for the array of facets
- CategoryAttributesStream categoryAttributesStream = new CategoryAttributesStream(
- e.getValue());
-
- // Set a suitable {@link TokenStream} using
- // CategoryParentsStream, followed by CategoryListTokenizer and
- // CategoryTokenizer composition (the ordering of the last two is
- // not mandatory).
- CategoryParentsStream parentsStream = (CategoryParentsStream) getParentsStream(categoryAttributesStream);
- CategoryListTokenizer categoryListTokenizer = getCategoryListTokenizer(parentsStream);
- CategoryTokenizer stream = getCategoryTokenizer(categoryListTokenizer);
-
- // Finally creating a suitable field with stream and adding it to a
- // master field-list, used during the build process (see
- // super.build())
- fieldList.add(new Field(e.getKey(), stream));
- }
-
- return this;
- }
-
- /**
- * Get a stream of categories which includes the parents, according to
- * policies defined in indexing parameters.
- *
- * @param categoryAttributesStream
- * The input stream
- * @return The parents stream.
- * @see OrdinalPolicy OrdinalPolicy (for policy of adding category tokens for parents)
- * @see PathPolicy PathPolicy (for policy of adding category <b>list</b> tokens for parents)
- */
- protected TokenStream getParentsStream(
- CategoryAttributesStream categoryAttributesStream) {
- return new CategoryParentsStream(categoryAttributesStream,
- taxonomyWriter, indexingParams);
- }
-
- /**
- * Fills the categories mapping between a field name and a list of
- * categories that belongs to it according to this builder's
- * {@link FacetIndexingParams} object
- *
- * @param categories
- * Iterable over the category attributes
- */
- protected void fillCategoriesMap(Iterable<CategoryAttribute> categories)
- throws IOException {
- categoriesMap.clear();
-
- // for-each category
- for (CategoryAttribute category : categories) {
- // extracting the field-name to which this category belongs
- String fieldName = indexingParams.getCategoryListParams(
- category.getCategoryPath()).getTerm().field();
-
- // getting the list of categories which belongs to that field
- List<CategoryAttribute> list = categoriesMap.get(fieldName);
-
- // if no such list exists
- if (list == null) {
- // adding a new one to the map
- list = new ArrayList<CategoryAttribute>();
- categoriesMap.put(fieldName, list);
- }
-
- // adding the new category to the list
- list.add(category.clone());
- }
- }
-
- /**
- * Get a category list tokenizer (or a series of such tokenizers) to create
- * the <b>category list tokens</b>.
- *
- * @param categoryStream
- * A stream containing {@link CategoryAttribute} with the
- * relevant data.
- * @return The category list tokenizer (or series of tokenizers) to be used
- * in creating category list tokens.
- */
- protected CategoryListTokenizer getCategoryListTokenizer(
- TokenStream categoryStream) {
- return getCountingListTokenizer(categoryStream);
- }
-
- /**
- * Get a {@link CountingListTokenizer} for creating counting list token.
- *
- * @param categoryStream
- * A stream containing {@link CategoryAttribute}s with the
- * relevant data.
- * @return A counting list tokenizer to be used in creating counting list
- * token.
- */
- protected CountingListTokenizer getCountingListTokenizer(
- TokenStream categoryStream) {
- return new CountingListTokenizer(categoryStream, indexingParams);
- }
-
- /**
- * Get a {@link CategoryTokenizer} to create the <b>category tokens</b>.
- * This method can be overridden for adding more attributes to the category
- * tokens.
- *
- * @param categoryStream
- * A stream containing {@link CategoryAttribute} with the
- * relevant data.
- * @return The {@link CategoryTokenizer} to be used in creating category
- * tokens.
- * @throws IOException
- */
- protected CategoryTokenizer getCategoryTokenizer(TokenStream categoryStream)
- throws IOException {
- return new CategoryTokenizer(categoryStream, indexingParams);
- }
-
- /**
- * Adds the fields created in one of the "set" methods to the document
- */
- public Document build(Document doc) {
- for (Field f : fieldList) {
- f.setOmitNorms(true);
- doc.add(f);
- }
- return doc;
- }
-
-}