pylucene 3.5.0-3

[pylucene.git] / lucene-java-3.5.0 / lucene / src / java / org / apache / lucene / index / PrefixCodedTerms.java
diff --git a/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/PrefixCodedTerms.java b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/PrefixCodedTerms.java

new file mode 100644 (file)

index 0000000..ac53954
--- /dev/null
+++ b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/PrefixCodedTerms.java
@@ -0,0 +1,150 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.RAMFile;
+import org.apache.lucene.store.RAMInputStream;
+import org.apache.lucene.store.RAMOutputStream;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.StringHelper;
+
+/**
+ * Prefix codes term instances (prefixes are shared)
+ * @lucene.experimental
+ */
+class PrefixCodedTerms implements Iterable<Term> {
+  final RAMFile buffer;
+  
+  private PrefixCodedTerms(RAMFile buffer) {
+    this.buffer = buffer;
+  }
+  
+  /** @return size in bytes */
+  public long getSizeInBytes() {
+    return buffer.getSizeInBytes();
+  }
+  
+  /** @return iterator over the bytes */
+  public Iterator<Term> iterator() {
+    return new PrefixCodedTermsIterator();
+  }
+  
+  class PrefixCodedTermsIterator implements Iterator<Term> {
+    final IndexInput input;
+    String field = "";
+    BytesRef bytes = new BytesRef();
+    Term term = new Term(field, "");
+
+    PrefixCodedTermsIterator() {
+      try {
+        input = new RAMInputStream("PrefixCodedTermsIterator", buffer);
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    public boolean hasNext() {
+      return input.getFilePointer() < input.length();
+    }
+    
+    public Term next() {
+      assert hasNext();
+      try {
+        int code = input.readVInt();
+        if ((code & 1) != 0) {
+          // new field
+          field = StringHelper.intern(input.readString());
+        }
+        int prefix = code >>> 1;
+        int suffix = input.readVInt();
+        bytes.grow(prefix + suffix);
+        input.readBytes(bytes.bytes, prefix, suffix);
+        bytes.length = prefix + suffix;
+        term.set(field, bytes.utf8ToString());
+        return term;
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+    
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+  }
+  
+  /** Builds a PrefixCodedTerms: call add repeatedly, then finish. */
+  public static class Builder {
+    private RAMFile buffer = new RAMFile();
+    private RAMOutputStream output = new RAMOutputStream(buffer);
+    private Term lastTerm = new Term("");
+    private BytesRef lastBytes = new BytesRef();
+    private BytesRef scratch = new BytesRef();
+
+    /** add a term */
+    public void add(Term term) {
+      assert lastTerm.equals(new Term("")) || term.compareTo(lastTerm) > 0;
+
+      scratch.copy(term.text);
+      try {
+        int prefix = sharedPrefix(lastBytes, scratch);
+        int suffix = scratch.length - prefix;
+        if (term.field.equals(lastTerm.field)) {
+          output.writeVInt(prefix << 1);
+        } else {
+          output.writeVInt(prefix << 1 | 1);
+          output.writeString(term.field);
+        }
+        output.writeVInt(suffix);
+        output.writeBytes(scratch.bytes, scratch.offset + prefix, suffix);
+        lastBytes.copy(scratch);
+        lastTerm.text = term.text;
+        lastTerm.field = term.field;
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+    
+    /** return finalized form */
+    public PrefixCodedTerms finish() {
+      try {
+        output.close();
+        return new PrefixCodedTerms(buffer);
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+    
+    private int sharedPrefix(BytesRef term1, BytesRef term2) {
+      int pos1 = 0;
+      int pos1End = pos1 + Math.min(term1.length, term2.length);
+      int pos2 = 0;
+      while(pos1 < pos1End) {
+        if (term1.bytes[term1.offset + pos1] != term2.bytes[term2.offset + pos2]) {
+          return pos1;
+        }
+        pos1++;
+        pos2++;
+      }
+      return pos1;
+    }
+  }
+}