1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.util.Iterator;
23 import org.apache.lucene.store.IndexInput;
24 import org.apache.lucene.store.RAMFile;
25 import org.apache.lucene.store.RAMInputStream;
26 import org.apache.lucene.store.RAMOutputStream;
27 import org.apache.lucene.util.BytesRef;
28 import org.apache.lucene.util.StringHelper;
31 * Prefix codes term instances (prefixes are shared)
32 * @lucene.experimental
34 class PrefixCodedTerms implements Iterable<Term> {
37 private PrefixCodedTerms(RAMFile buffer) {
41 /** @return size in bytes */
42 public long getSizeInBytes() {
43 return buffer.getSizeInBytes();
46 /** @return iterator over the bytes */
47 public Iterator<Term> iterator() {
48 return new PrefixCodedTermsIterator();
51 class PrefixCodedTermsIterator implements Iterator<Term> {
52 final IndexInput input;
54 BytesRef bytes = new BytesRef();
55 Term term = new Term(field, "");
57 PrefixCodedTermsIterator() {
59 input = new RAMInputStream("PrefixCodedTermsIterator", buffer);
60 } catch (IOException e) {
61 throw new RuntimeException(e);
65 public boolean hasNext() {
66 return input.getFilePointer() < input.length();
72 int code = input.readVInt();
73 if ((code & 1) != 0) {
75 field = StringHelper.intern(input.readString());
77 int prefix = code >>> 1;
78 int suffix = input.readVInt();
79 bytes.grow(prefix + suffix);
80 input.readBytes(bytes.bytes, prefix, suffix);
81 bytes.length = prefix + suffix;
82 term.set(field, bytes.utf8ToString());
84 } catch (IOException e) {
85 throw new RuntimeException(e);
89 public void remove() {
90 throw new UnsupportedOperationException();
94 /** Builds a PrefixCodedTerms: call add repeatedly, then finish. */
95 public static class Builder {
96 private RAMFile buffer = new RAMFile();
97 private RAMOutputStream output = new RAMOutputStream(buffer);
98 private Term lastTerm = new Term("");
99 private BytesRef lastBytes = new BytesRef();
100 private BytesRef scratch = new BytesRef();
103 public void add(Term term) {
104 assert lastTerm.equals(new Term("")) || term.compareTo(lastTerm) > 0;
106 scratch.copy(term.text);
108 int prefix = sharedPrefix(lastBytes, scratch);
109 int suffix = scratch.length - prefix;
110 if (term.field.equals(lastTerm.field)) {
111 output.writeVInt(prefix << 1);
113 output.writeVInt(prefix << 1 | 1);
114 output.writeString(term.field);
116 output.writeVInt(suffix);
117 output.writeBytes(scratch.bytes, scratch.offset + prefix, suffix);
118 lastBytes.copy(scratch);
119 lastTerm.text = term.text;
120 lastTerm.field = term.field;
121 } catch (IOException e) {
122 throw new RuntimeException(e);
126 /** return finalized form */
127 public PrefixCodedTerms finish() {
130 return new PrefixCodedTerms(buffer);
131 } catch (IOException e) {
132 throw new RuntimeException(e);
136 private int sharedPrefix(BytesRef term1, BytesRef term2) {
138 int pos1End = pos1 + Math.min(term1.length, term2.length);
140 while(pos1 < pos1End) {
141 if (term1.bytes[term1.offset + pos1] != term2.bytes[term2.offset + pos2]) {