1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
22 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
23 import org.apache.lucene.document.Fieldable;
24 import org.apache.lucene.index.FieldInfo.IndexOptions;
25 import org.apache.lucene.util.RamUsageEstimator;
27 // TODO: break into separate freq and prox writers as
28 // codecs; make separate container (tii/tis/skip/*) that can
29 // be configured as any number of files 1..N
30 final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implements Comparable<FreqProxTermsWriterPerField> {
32 final FreqProxTermsWriterPerThread perThread;
33 final TermsHashPerField termsHashPerField;
34 final FieldInfo fieldInfo;
35 final DocumentsWriter.DocState docState;
36 final FieldInvertState fieldState;
37 IndexOptions indexOptions;
38 PayloadAttribute payloadAttribute;
40 public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriterPerThread perThread, FieldInfo fieldInfo) {
41 this.termsHashPerField = termsHashPerField;
42 this.perThread = perThread;
43 this.fieldInfo = fieldInfo;
44 docState = termsHashPerField.docState;
45 fieldState = termsHashPerField.fieldState;
46 indexOptions = fieldInfo.indexOptions;
50 int getStreamCount() {
51 if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
63 void skippingLongTerm() throws IOException {}
65 public int compareTo(FreqProxTermsWriterPerField other) {
66 return fieldInfo.name.compareTo(other.fieldInfo.name);
70 // Record, up front, whether our in-RAM format will be
71 // with or without term freqs:
72 indexOptions = fieldInfo.indexOptions;
73 payloadAttribute = null;
77 boolean start(Fieldable[] fields, int count) {
78 for(int i=0;i<count;i++)
79 if (fields[i].isIndexed())
85 void start(Fieldable f) {
86 if (fieldState.attributeSource.hasAttribute(PayloadAttribute.class)) {
87 payloadAttribute = fieldState.attributeSource.getAttribute(PayloadAttribute.class);
89 payloadAttribute = null;
93 void writeProx(final int termID, int proxCode) {
94 final Payload payload;
95 if (payloadAttribute == null) {
98 payload = payloadAttribute.getPayload();
101 if (payload != null && payload.length > 0) {
102 termsHashPerField.writeVInt(1, (proxCode<<1)|1);
103 termsHashPerField.writeVInt(1, payload.length);
104 termsHashPerField.writeBytes(1, payload.data, payload.offset, payload.length);
107 termsHashPerField.writeVInt(1, proxCode<<1);
109 FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
110 postings.lastPositions[termID] = fieldState.position;
115 void newTerm(final int termID) {
116 // First time we're seeing this term since the last
118 assert docState.testPoint("FreqProxTermsWriterPerField.newTerm start");
120 FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
121 postings.lastDocIDs[termID] = docState.docID;
122 if (indexOptions == IndexOptions.DOCS_ONLY) {
123 postings.lastDocCodes[termID] = docState.docID;
125 postings.lastDocCodes[termID] = docState.docID << 1;
126 postings.docFreqs[termID] = 1;
127 if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
128 writeProx(termID, fieldState.position);
131 fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
132 fieldState.uniqueTermCount++;
136 void addTerm(final int termID) {
138 assert docState.testPoint("FreqProxTermsWriterPerField.addTerm start");
140 FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
142 assert indexOptions == IndexOptions.DOCS_ONLY || postings.docFreqs[termID] > 0;
144 if (indexOptions == IndexOptions.DOCS_ONLY) {
145 if (docState.docID != postings.lastDocIDs[termID]) {
146 assert docState.docID > postings.lastDocIDs[termID];
147 termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
148 postings.lastDocCodes[termID] = docState.docID - postings.lastDocIDs[termID];
149 postings.lastDocIDs[termID] = docState.docID;
150 fieldState.uniqueTermCount++;
153 if (docState.docID != postings.lastDocIDs[termID]) {
154 assert docState.docID > postings.lastDocIDs[termID];
155 // Term not yet seen in the current doc but previously
156 // seen in other doc(s) since the last flush
158 // Now that we know doc freq for previous doc,
159 // write it & lastDocCode
160 if (1 == postings.docFreqs[termID])
161 termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]|1);
163 termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
164 termsHashPerField.writeVInt(0, postings.docFreqs[termID]);
166 postings.docFreqs[termID] = 1;
167 fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
168 postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
169 postings.lastDocIDs[termID] = docState.docID;
170 if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
171 writeProx(termID, fieldState.position);
173 fieldState.uniqueTermCount++;
175 fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
176 if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
177 writeProx(termID, fieldState.position-postings.lastPositions[termID]);
184 ParallelPostingsArray createPostingsArray(int size) {
185 return new FreqProxPostingsArray(size);
188 static final class FreqProxPostingsArray extends ParallelPostingsArray {
189 public FreqProxPostingsArray(int size) {
191 docFreqs = new int[size];
192 lastDocIDs = new int[size];
193 lastDocCodes = new int[size];
194 lastPositions = new int[size];
197 int docFreqs[]; // # times this term occurs in the current doc
198 int lastDocIDs[]; // Last docID where this term occurred
199 int lastDocCodes[]; // Code for prior doc
200 int lastPositions[]; // Last position where this term occurred
203 ParallelPostingsArray newInstance(int size) {
204 return new FreqProxPostingsArray(size);
208 void copyTo(ParallelPostingsArray toArray, int numToCopy) {
209 assert toArray instanceof FreqProxPostingsArray;
210 FreqProxPostingsArray to = (FreqProxPostingsArray) toArray;
212 super.copyTo(toArray, numToCopy);
214 System.arraycopy(docFreqs, 0, to.docFreqs, 0, numToCopy);
215 System.arraycopy(lastDocIDs, 0, to.lastDocIDs, 0, numToCopy);
216 System.arraycopy(lastDocCodes, 0, to.lastDocCodes, 0, numToCopy);
217 System.arraycopy(lastPositions, 0, to.lastPositions, 0, numToCopy);
221 int bytesPerPosting() {
222 return ParallelPostingsArray.BYTES_PER_POSTING + 4 * RamUsageEstimator.NUM_BYTES_INT;
226 public void abort() {}