1 package org.apache.lucene.facet.index;
3 import java.io.ByteArrayInputStream;
4 import java.io.ByteArrayOutputStream;
5 import java.io.IOException;
6 import java.io.InputStream;
7 import java.util.HashMap;
10 import org.apache.lucene.index.PayloadProcessorProvider;
11 import org.apache.lucene.index.Term;
12 import org.apache.lucene.store.Directory;
14 import org.apache.lucene.facet.index.params.CategoryListParams;
15 import org.apache.lucene.facet.index.params.FacetIndexingParams;
16 import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter.OrdinalMap;
17 import org.apache.lucene.util.encoding.IntDecoder;
18 import org.apache.lucene.util.encoding.IntEncoder;
21 * Licensed to the Apache Software Foundation (ASF) under one or more
22 * contributor license agreements. See the NOTICE file distributed with
23 * this work for additional information regarding copyright ownership.
24 * The ASF licenses this file to You under the Apache License, Version 2.0
25 * (the "License"); you may not use this file except in compliance with
26 * the License. You may obtain a copy of the License at
28 * http://www.apache.org/licenses/LICENSE-2.0
30 * Unless required by applicable law or agreed to in writing, software
31 * distributed under the License is distributed on an "AS IS" BASIS,
32 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
33 * See the License for the specific language governing permissions and
34 * limitations under the License.
38 * A {@link PayloadProcessorProvider} for updating facets ordinal references,
39 * based on an ordinal map. You should use this code in conjunction with merging
40 * taxonomies - after you merge taxonomies, you receive an {@link OrdinalMap}
41 * which maps the 'old' payloads to the 'new' ones. You can use that map to
42 * re-map the payloads which contain the facets information (ordinals) either
43 * before or while merging the indexes.
45 * For re-mapping the ordinals before you merge the indexes, do the following:
48 * // merge the old taxonomy with the new one.
49 * OrdinalMap map = LuceneTaxonomyWriter.addTaxonomies();
50 * int[] ordmap = map.getMap();
52 * // re-map the ordinals on the old directory.
54 * FacetsPayloadProcessorProvider fppp = new FacetsPayloadProcessorProvider(
56 * IndexWriterConfig conf = new IndexWriterConfig(VER, ANALYZER);
57 * conf.setMergePolicy(new ForceOptimizeMergePolicy());
58 * IndexWriter writer = new IndexWriter(oldDir, conf);
59 * writer.setPayloadProcessorProvider(fppp);
63 * // merge that directory with the new index.
64 * IndexWriter newWriter; // opened on the 'new' Directory
65 * newWriter.addIndexes(oldDir);
69 * For re-mapping the ordinals during index merge, do the following:
72 * // merge the old taxonomy with the new one.
73 * OrdinalMap map = LuceneTaxonomyWriter.addTaxonomies();
74 * int[] ordmap = map.getMap();
76 * // Add the index and re-map ordinals on the go
77 * IndexReader r = IndexReader.open(oldDir);
78 * IndexWriterConfig conf = new IndexWriterConfig(VER, ANALYZER);
79 * IndexWriter writer = new IndexWriter(newDir, conf);
80 * writer.setPayloadProcessorProvider(fppp);
81 * writer.addIndexes(r);
85 * <b>NOTE:</b> while the second example looks simpler, IndexWriter may trigger
86 * a long merge due to addIndexes. The first example avoids this perhaps
87 * unneeded merge, as well as can be done separately (e.g. on another node)
88 * before the index is merged.
90 * @lucene.experimental
92 public class FacetsPayloadProcessorProvider extends PayloadProcessorProvider {
94 private final Directory workDir;
96 private final DirPayloadProcessor dirProcessor;
99 * Construct FacetsPayloadProcessorProvider with FacetIndexingParams
101 * @param dir the {@link Directory} containing the segments to update
102 * @param ordinalMap an array mapping previous facets ordinals to new ones
103 * @param indexingParams the facets indexing parameters
105 public FacetsPayloadProcessorProvider(Directory dir, int[] ordinalMap,
106 FacetIndexingParams indexingParams) {
108 dirProcessor = new FacetsDirPayloadProcessor(indexingParams, ordinalMap);
112 public DirPayloadProcessor getDirProcessor(Directory dir) throws IOException {
113 if (workDir != dir) {
119 public static class FacetsDirPayloadProcessor extends DirPayloadProcessor {
121 private final Map<Term, CategoryListParams> termMap = new HashMap<Term, CategoryListParams>(1);
123 private final int[] ordinalMap;
126 * Construct FacetsDirPayloadProcessor with custom FacetIndexingParams
127 * @param ordinalMap an array mapping previous facets ordinals to new ones
128 * @param indexingParams the facets indexing parameters
130 protected FacetsDirPayloadProcessor(FacetIndexingParams indexingParams, int[] ordinalMap) {
131 this.ordinalMap = ordinalMap;
132 for (CategoryListParams params: indexingParams.getAllCategoryListParams()) {
133 termMap.put(params.getTerm(), params);
138 public PayloadProcessor getProcessor(Term term) throws IOException {
139 CategoryListParams params = termMap.get(term);
140 if (params == null) {
143 return new FacetsPayloadProcessor(params, ordinalMap);
148 /** A PayloadProcessor for updating facets ordinal references, based on an ordinal map */
149 public static class FacetsPayloadProcessor extends PayloadProcessor {
151 private final IntEncoder encoder;
152 private final IntDecoder decoder;
153 private final int[] ordinalMap;
154 private final ByteArrayOutputStream os = new ByteArrayOutputStream();
157 * @param params defines the encoding of facet ordinals as payload
158 * @param ordinalMap an array mapping previous facets ordinals to new ones
160 protected FacetsPayloadProcessor(CategoryListParams params, int[] ordinalMap) {
161 encoder = params.createEncoder();
162 decoder = encoder.createMatchingDecoder();
163 this.ordinalMap = ordinalMap;
167 public int payloadLength() throws IOException {
172 public byte[] processPayload(byte[] payload, int start, int length) throws IOException {
173 InputStream is = new ByteArrayInputStream(payload, start, length);
178 while ((ordinal = decoder.decode()) != IntDecoder.EOS) {
179 int newOrdinal = ordinalMap[(int)ordinal];
180 encoder.encode(newOrdinal);
183 return os.toByteArray();