1 package org.apache.lucene.facet.index.streaming;
3 import java.io.IOException;
4 import java.util.HashMap;
5 import java.util.Iterator;
6 import java.util.Map.Entry;
8 import org.apache.lucene.analysis.TokenStream;
10 import org.apache.lucene.facet.index.CategoryListPayloadStream;
11 import org.apache.lucene.facet.index.attributes.OrdinalProperty;
12 import org.apache.lucene.facet.index.params.CategoryListParams;
13 import org.apache.lucene.facet.index.params.FacetIndexingParams;
14 import org.apache.lucene.facet.taxonomy.CategoryPath;
15 import org.apache.lucene.facet.util.PartitionsUtils;
16 import org.apache.lucene.util.encoding.IntEncoder;
19 * Licensed to the Apache Software Foundation (ASF) under one or more
20 * contributor license agreements. See the NOTICE file distributed with
21 * this work for additional information regarding copyright ownership.
22 * The ASF licenses this file to You under the Apache License, Version 2.0
23 * (the "License"); you may not use this file except in compliance with
24 * the License. You may obtain a copy of the License at
26 * http://www.apache.org/licenses/LICENSE-2.0
28 * Unless required by applicable law or agreed to in writing, software
29 * distributed under the License is distributed on an "AS IS" BASIS,
30 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31 * See the License for the specific language governing permissions and
32 * limitations under the License.
36 * {@link CategoryListTokenizer} for facet counting
38 * @lucene.experimental
40 public class CountingListTokenizer extends CategoryListTokenizer {
42 /** A table for retrieving payload streams by category-list name. */
43 protected HashMap<String, CategoryListPayloadStream> payloadStreamsByName =
44 new HashMap<String, CategoryListPayloadStream>();
46 /** An iterator over the payload streams */
47 protected Iterator<Entry<String, CategoryListPayloadStream>> payloadStreamIterator;
49 public CountingListTokenizer(TokenStream input,
50 FacetIndexingParams indexingParams) {
51 super(input, indexingParams);
52 this.payloadStreamsByName = new HashMap<String, CategoryListPayloadStream>();
56 protected void handleStartOfInput() throws IOException {
57 payloadStreamsByName.clear();
58 payloadStreamIterator = null;
62 public final boolean incrementToken() throws IOException {
63 if (input.incrementToken()) {
64 if (this.categoryAttribute != null) {
65 OrdinalProperty ordinalProperty = (OrdinalProperty) this.categoryAttribute
66 .getProperty(OrdinalProperty.class);
67 if (ordinalProperty != null && legalCategory()) {
68 CategoryPath categoryPath = this.categoryAttribute
70 int ordinal = ordinalProperty.getOrdinal();
71 CategoryListPayloadStream payloadStream = getPayloadStream(
72 categoryPath, ordinal);
73 int partitionSize = indexingParams.getPartitionSize();
74 payloadStream.appendIntToStream(ordinal % partitionSize);
79 if (this.payloadStreamIterator == null) {
80 this.handleEndOfInput();
81 this.payloadStreamIterator = this.payloadStreamsByName.entrySet()
84 if (this.payloadStreamIterator.hasNext()) {
85 Entry<String, CategoryListPayloadStream> entry = this.payloadStreamIterator
87 String countingListName = entry.getKey();
88 int length = countingListName.length();
89 this.termAttribute.resizeBuffer(length);
90 countingListName.getChars(0, length, termAttribute.buffer(), 0);
91 this.termAttribute.setLength(length);
92 CategoryListPayloadStream payloadStream = entry.getValue();
93 payload.setData(payloadStream.convertStreamToByteArray());
94 this.payloadAttribute.setPayload(payload);
101 * A method which allows extending classes to filter the categories going
102 * into the counting list.
104 * @return By default returns {@code true}, meaning the current category is
105 * to be part of the counting list. For categories that should be
106 * filtered, return {@code false}.
108 protected boolean legalCategory() {
112 protected CategoryListPayloadStream getPayloadStream(
113 CategoryPath categoryPath, int ordinal) throws IOException {
114 CategoryListParams clParams = this.indexingParams.getCategoryListParams(categoryPath);
115 String name = PartitionsUtils.partitionNameByOrdinal(indexingParams, clParams, ordinal);
116 CategoryListPayloadStream fps = payloadStreamsByName.get(name);
118 IntEncoder encoder = clParams.createEncoder();
119 fps = new CategoryListPayloadStream(encoder);
120 payloadStreamsByName.put(name, fps);