X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/PrefixCodedTerms.java?ds=sidebyside diff --git a/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/PrefixCodedTerms.java b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/PrefixCodedTerms.java new file mode 100644 index 0000000..ac53954 --- /dev/null +++ b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/PrefixCodedTerms.java @@ -0,0 +1,150 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Iterator; + +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.RAMFile; +import org.apache.lucene.store.RAMInputStream; +import org.apache.lucene.store.RAMOutputStream; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.StringHelper; + +/** + * Prefix codes term instances (prefixes are shared) + * @lucene.experimental + */ +class PrefixCodedTerms implements Iterable { + final RAMFile buffer; + + private PrefixCodedTerms(RAMFile buffer) { + this.buffer = buffer; + } + + /** @return size in bytes */ + public long getSizeInBytes() { + return buffer.getSizeInBytes(); + } + + /** @return iterator over the bytes */ + public Iterator iterator() { + return new PrefixCodedTermsIterator(); + } + + class PrefixCodedTermsIterator implements Iterator { + final IndexInput input; + String field = ""; + BytesRef bytes = new BytesRef(); + Term term = new Term(field, ""); + + PrefixCodedTermsIterator() { + try { + input = new RAMInputStream("PrefixCodedTermsIterator", buffer); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public boolean hasNext() { + return input.getFilePointer() < input.length(); + } + + public Term next() { + assert hasNext(); + try { + int code = input.readVInt(); + if ((code & 1) != 0) { + // new field + field = StringHelper.intern(input.readString()); + } + int prefix = code >>> 1; + int suffix = input.readVInt(); + bytes.grow(prefix + suffix); + input.readBytes(bytes.bytes, prefix, suffix); + bytes.length = prefix + suffix; + term.set(field, bytes.utf8ToString()); + return term; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public void remove() { + throw new UnsupportedOperationException(); + } + } + + /** Builds a PrefixCodedTerms: call add repeatedly, then finish. */ + public static class Builder { + private RAMFile buffer = new RAMFile(); + private RAMOutputStream output = new RAMOutputStream(buffer); + private Term lastTerm = new Term(""); + private BytesRef lastBytes = new BytesRef(); + private BytesRef scratch = new BytesRef(); + + /** add a term */ + public void add(Term term) { + assert lastTerm.equals(new Term("")) || term.compareTo(lastTerm) > 0; + + scratch.copy(term.text); + try { + int prefix = sharedPrefix(lastBytes, scratch); + int suffix = scratch.length - prefix; + if (term.field.equals(lastTerm.field)) { + output.writeVInt(prefix << 1); + } else { + output.writeVInt(prefix << 1 | 1); + output.writeString(term.field); + } + output.writeVInt(suffix); + output.writeBytes(scratch.bytes, scratch.offset + prefix, suffix); + lastBytes.copy(scratch); + lastTerm.text = term.text; + lastTerm.field = term.field; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** return finalized form */ + public PrefixCodedTerms finish() { + try { + output.close(); + return new PrefixCodedTerms(buffer); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private int sharedPrefix(BytesRef term1, BytesRef term2) { + int pos1 = 0; + int pos1End = pos1 + Math.min(term1.length, term2.length); + int pos2 = 0; + while(pos1 < pos1End) { + if (term1.bytes[term1.offset + pos1] != term2.bytes[term2.offset + pos2]) { + return pos1; + } + pos1++; + pos2++; + } + return pos1; + } + } +}