pylucene 3.5.0-3

[pylucene.git] / lucene-java-3.5.0 / lucene / contrib / facet / src / java / org / apache / lucene / facet / index / FacetsPayloadProcessorProvider.java
diff --git a/lucene-java-3.5.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/index/FacetsPayloadProcessorProvider.java b/lucene-java-3.5.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/index/FacetsPayloadProcessorProvider.java

new file mode 100644 (file)

index 0000000..c5a054e
--- /dev/null
+++ b/lucene-java-3.5.0/lucene/contrib/facet/src/java/org/apache/lucene/facet/index/FacetsPayloadProcessorProvider.java
@@ -0,0 +1,187 @@
+package org.apache.lucene.facet.index;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.index.PayloadProcessorProvider;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.store.Directory;
+
+import org.apache.lucene.facet.index.params.CategoryListParams;
+import org.apache.lucene.facet.index.params.FacetIndexingParams;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap;
+import org.apache.lucene.util.encoding.IntDecoder;
+import org.apache.lucene.util.encoding.IntEncoder;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A {@link PayloadProcessorProvider} for updating facets ordinal references,
+ * based on an ordinal map. You should use this code in conjunction with merging
+ * taxonomies - after you merge taxonomies, you receive an {@link OrdinalMap}
+ * which maps the 'old' payloads to the 'new' ones. You can use that map to
+ * re-map the payloads which contain the facets information (ordinals) either
+ * before or while merging the indexes.
+ * <p>
+ * For re-mapping the ordinals before you merge the indexes, do the following:
+ * 
+ * <pre>
+ * // merge the old taxonomy with the new one.
+ * OrdinalMap map = LuceneTaxonomyWriter.addTaxonomies();
+ * int[] ordmap = map.getMap();
+ * 
+ * // re-map the ordinals on the old directory.
+ * Directory oldDir;
+ * FacetsPayloadProcessorProvider fppp = new FacetsPayloadProcessorProvider(
+ *     oldDir, ordmap);
+ * IndexWriterConfig conf = new IndexWriterConfig(VER, ANALYZER);
+ * conf.setMergePolicy(new ForceOptimizeMergePolicy());
+ * IndexWriter writer = new IndexWriter(oldDir, conf);
+ * writer.setPayloadProcessorProvider(fppp);
+ * writer.forceMerge(1);
+ * writer.close();
+ * 
+ * // merge that directory with the new index.
+ * IndexWriter newWriter; // opened on the 'new' Directory
+ * newWriter.addIndexes(oldDir);
+ * newWriter.commit();
+ * </pre>
+ * 
+ * For re-mapping the ordinals during index merge, do the following:
+ * 
+ * <pre>
+ * // merge the old taxonomy with the new one.
+ * OrdinalMap map = LuceneTaxonomyWriter.addTaxonomies();
+ * int[] ordmap = map.getMap();
+ * 
+ * // Add the index and re-map ordinals on the go
+ * IndexReader r = IndexReader.open(oldDir);
+ * IndexWriterConfig conf = new IndexWriterConfig(VER, ANALYZER);
+ * IndexWriter writer = new IndexWriter(newDir, conf);
+ * writer.setPayloadProcessorProvider(fppp);
+ * writer.addIndexes(r);
+ * writer.commit();
+ * </pre>
+ * <p>
+ * <b>NOTE:</b> while the second example looks simpler, IndexWriter may trigger
+ * a long merge due to addIndexes. The first example avoids this perhaps
+ * unneeded merge, as well as can be done separately (e.g. on another node)
+ * before the index is merged.
+ * 
+ * @lucene.experimental
+ */
+public class FacetsPayloadProcessorProvider extends PayloadProcessorProvider {
+  
+  private final Directory workDir;
+  
+  private final DirPayloadProcessor dirProcessor;
+
+  /**
+   * Construct FacetsPayloadProcessorProvider with FacetIndexingParams
+   * 
+   * @param dir the {@link Directory} containing the segments to update
+   * @param ordinalMap an array mapping previous facets ordinals to new ones
+   * @param indexingParams the facets indexing parameters
+   */
+  public FacetsPayloadProcessorProvider(Directory dir, int[] ordinalMap,
+                                        FacetIndexingParams indexingParams) {
+    workDir = dir;
+    dirProcessor = new FacetsDirPayloadProcessor(indexingParams, ordinalMap);
+  }
+  
+  @Override
+  public DirPayloadProcessor getDirProcessor(Directory dir) throws IOException {
+    if (workDir != dir) {
+      return null;
+    }
+    return dirProcessor;
+  }
+  
+  public static class FacetsDirPayloadProcessor extends DirPayloadProcessor {
+    
+    private final Map<Term, CategoryListParams> termMap = new HashMap<Term, CategoryListParams>(1);
+    
+    private final int[] ordinalMap;
+    
+    /**
+     * Construct FacetsDirPayloadProcessor with custom FacetIndexingParams
+     * @param ordinalMap an array mapping previous facets ordinals to new ones
+     * @param indexingParams the facets indexing parameters
+     */
+    protected FacetsDirPayloadProcessor(FacetIndexingParams indexingParams, int[] ordinalMap) {
+      this.ordinalMap = ordinalMap;
+      for (CategoryListParams params: indexingParams.getAllCategoryListParams()) {
+        termMap.put(params.getTerm(), params);
+      }
+    }
+    
+    @Override
+    public PayloadProcessor getProcessor(Term term) throws IOException {
+      CategoryListParams params = termMap.get(term);
+      if (params == null) {
+        return null;
+      }
+      return new FacetsPayloadProcessor(params, ordinalMap);
+    }
+
+  }
+  
+  /** A PayloadProcessor for updating facets ordinal references, based on an ordinal map */
+  public static class FacetsPayloadProcessor extends PayloadProcessor {
+    
+    private final IntEncoder encoder;
+    private final IntDecoder decoder;
+    private final int[] ordinalMap;
+    private final ByteArrayOutputStream os = new ByteArrayOutputStream();
+    
+    /**
+     * @param params defines the encoding of facet ordinals as payload
+     * @param ordinalMap an array mapping previous facets ordinals to new ones
+     */
+    protected FacetsPayloadProcessor(CategoryListParams params, int[] ordinalMap) {
+      encoder = params.createEncoder();
+      decoder = encoder.createMatchingDecoder();
+      this.ordinalMap = ordinalMap;
+    }
+    
+    @Override
+    public int payloadLength() throws IOException {
+      return os.size();
+    }
+
+    @Override
+    public byte[] processPayload(byte[] payload, int start, int length) throws IOException {
+      InputStream is = new ByteArrayInputStream(payload, start, length);
+      decoder.reInit(is);
+      os.reset();
+      encoder.reInit(os);
+      long ordinal;
+      while ((ordinal = decoder.decode()) != IntDecoder.EOS) {
+        int newOrdinal = ordinalMap[(int)ordinal];
+        encoder.encode(newOrdinal);      
+      }
+      encoder.close();
+      return os.toByteArray();
+    }
+  }
+  
+}