1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.util.Iterator;
23 import org.apache.lucene.index.Term;
24 import org.apache.lucene.store.IndexInput;
25 import org.apache.lucene.store.RAMFile;
26 import org.apache.lucene.store.RAMInputStream;
27 import org.apache.lucene.store.RAMOutputStream;
28 import org.apache.lucene.util.BytesRef;
29 import org.apache.lucene.util.StringHelper;
32 * Prefix codes term instances (prefixes are shared)
33 * @lucene.experimental
35 class PrefixCodedTerms implements Iterable<Term> {
38 private PrefixCodedTerms(RAMFile buffer) {
42 /** @return size in bytes */
43 public long getSizeInBytes() {
44 return buffer.getSizeInBytes();
47 /** @return iterator over the bytes */
48 public Iterator<Term> iterator() {
49 return new PrefixCodedTermsIterator();
52 class PrefixCodedTermsIterator implements Iterator<Term> {
53 final IndexInput input;
55 BytesRef bytes = new BytesRef();
56 Term term = new Term(field, "");
58 PrefixCodedTermsIterator() {
60 input = new RAMInputStream(buffer);
61 } catch (IOException e) {
62 throw new RuntimeException(e);
66 public boolean hasNext() {
67 return input.getFilePointer() < input.length();
73 int code = input.readVInt();
74 if ((code & 1) != 0) {
76 field = StringHelper.intern(input.readString());
78 int prefix = code >>> 1;
79 int suffix = input.readVInt();
80 bytes.grow(prefix + suffix);
81 input.readBytes(bytes.bytes, prefix, suffix);
82 bytes.length = prefix + suffix;
83 term.set(field, bytes.utf8ToString());
85 } catch (IOException e) {
86 throw new RuntimeException(e);
90 public void remove() {
91 throw new UnsupportedOperationException();
95 /** Builds a PrefixCodedTerms: call add repeatedly, then finish. */
96 public static class Builder {
97 private RAMFile buffer = new RAMFile();
98 private RAMOutputStream output = new RAMOutputStream(buffer);
99 private Term lastTerm = new Term("");
100 private BytesRef lastBytes = new BytesRef();
101 private BytesRef scratch = new BytesRef();
104 public void add(Term term) {
105 assert lastTerm.equals(new Term("")) || term.compareTo(lastTerm) > 0;
107 scratch.copy(term.text);
109 int prefix = sharedPrefix(lastBytes, scratch);
110 int suffix = scratch.length - prefix;
111 if (term.field.equals(lastTerm.field)) {
112 output.writeVInt(prefix << 1);
114 output.writeVInt(prefix << 1 | 1);
115 output.writeString(term.field);
117 output.writeVInt(suffix);
118 output.writeBytes(scratch.bytes, scratch.offset + prefix, suffix);
119 lastBytes.copy(scratch);
120 lastTerm.text = term.text;
121 lastTerm.field = term.field;
122 } catch (IOException e) {
123 throw new RuntimeException(e);
127 /** return finalized form */
128 public PrefixCodedTerms finish() {
131 return new PrefixCodedTerms(buffer);
132 } catch (IOException e) {
133 throw new RuntimeException(e);
137 private int sharedPrefix(BytesRef term1, BytesRef term2) {
139 int pos1End = pos1 + Math.min(term1.length, term2.length);
141 while(pos1 < pos1End) {
142 if (term1.bytes[term1.offset + pos1] != term2.bytes[term2.offset + pos2]) {