add --shared
[pylucene.git] / lucene-java-3.4.0 / lucene / src / java / org / apache / lucene / index / FreqProxTermsWriterPerField.java
1 package org.apache.lucene.index;
2
3 /**
4  * Licensed to the Apache Software Foundation (ASF) under one or more
5  * contributor license agreements.  See the NOTICE file distributed with
6  * this work for additional information regarding copyright ownership.
7  * The ASF licenses this file to You under the Apache License, Version 2.0
8  * (the "License"); you may not use this file except in compliance with
9  * the License.  You may obtain a copy of the License at
10  *
11  *     http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19
20 import java.io.IOException;
21
22 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
23 import org.apache.lucene.document.Fieldable;
24 import org.apache.lucene.index.FieldInfo.IndexOptions;
25 import org.apache.lucene.util.RamUsageEstimator;
26
27 // TODO: break into separate freq and prox writers as
28 // codecs; make separate container (tii/tis/skip/*) that can
29 // be configured as any number of files 1..N
30 final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implements Comparable<FreqProxTermsWriterPerField> {
31
32   final FreqProxTermsWriterPerThread perThread;
33   final TermsHashPerField termsHashPerField;
34   final FieldInfo fieldInfo;
35   final DocumentsWriter.DocState docState;
36   final FieldInvertState fieldState;
37   IndexOptions indexOptions;
38   PayloadAttribute payloadAttribute;
39
40   public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriterPerThread perThread, FieldInfo fieldInfo) {
41     this.termsHashPerField = termsHashPerField;
42     this.perThread = perThread;
43     this.fieldInfo = fieldInfo;
44     docState = termsHashPerField.docState;
45     fieldState = termsHashPerField.fieldState;
46     indexOptions = fieldInfo.indexOptions;
47   }
48
49   @Override
50   int getStreamCount() {
51     if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
52       return 1;
53     else
54       return 2;
55   }
56
57   @Override
58   void finish() {}
59
60   boolean hasPayloads;
61
62   @Override
63   void skippingLongTerm() throws IOException {}
64
65   public int compareTo(FreqProxTermsWriterPerField other) {
66     return fieldInfo.name.compareTo(other.fieldInfo.name);
67   }
68
69   void reset() {
70     // Record, up front, whether our in-RAM format will be
71     // with or without term freqs:
72     indexOptions = fieldInfo.indexOptions;
73     payloadAttribute = null;
74   }
75
76   @Override
77   boolean start(Fieldable[] fields, int count) {
78     for(int i=0;i<count;i++)
79       if (fields[i].isIndexed())
80         return true;
81     return false;
82   }     
83   
84   @Override
85   void start(Fieldable f) {
86     if (fieldState.attributeSource.hasAttribute(PayloadAttribute.class)) {
87       payloadAttribute = fieldState.attributeSource.getAttribute(PayloadAttribute.class);
88     } else {
89       payloadAttribute = null;
90     }
91   }
92
93   void writeProx(final int termID, int proxCode) {
94     final Payload payload;
95     if (payloadAttribute == null) {
96       payload = null;
97     } else {
98       payload = payloadAttribute.getPayload();
99     }
100     
101     if (payload != null && payload.length > 0) {
102       termsHashPerField.writeVInt(1, (proxCode<<1)|1);
103       termsHashPerField.writeVInt(1, payload.length);
104       termsHashPerField.writeBytes(1, payload.data, payload.offset, payload.length);
105       hasPayloads = true;      
106     } else
107       termsHashPerField.writeVInt(1, proxCode<<1);
108     
109     FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
110     postings.lastPositions[termID] = fieldState.position;
111     
112   }
113
114   @Override
115   void newTerm(final int termID) {
116     // First time we're seeing this term since the last
117     // flush
118     assert docState.testPoint("FreqProxTermsWriterPerField.newTerm start");
119     
120     FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
121     postings.lastDocIDs[termID] = docState.docID;
122     if (indexOptions == IndexOptions.DOCS_ONLY) {
123       postings.lastDocCodes[termID] = docState.docID;
124     } else {
125       postings.lastDocCodes[termID] = docState.docID << 1;
126       postings.docFreqs[termID] = 1;
127       if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
128         writeProx(termID, fieldState.position);
129       }
130     }
131     fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
132     fieldState.uniqueTermCount++;
133   }
134
135   @Override
136   void addTerm(final int termID) {
137
138     assert docState.testPoint("FreqProxTermsWriterPerField.addTerm start");
139     
140     FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
141     
142     assert indexOptions == IndexOptions.DOCS_ONLY || postings.docFreqs[termID] > 0;
143
144     if (indexOptions == IndexOptions.DOCS_ONLY) {
145       if (docState.docID != postings.lastDocIDs[termID]) {
146         assert docState.docID > postings.lastDocIDs[termID];
147         termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
148         postings.lastDocCodes[termID] = docState.docID - postings.lastDocIDs[termID];
149         postings.lastDocIDs[termID] = docState.docID;
150         fieldState.uniqueTermCount++;
151       }
152     } else {
153       if (docState.docID != postings.lastDocIDs[termID]) {
154         assert docState.docID > postings.lastDocIDs[termID];
155         // Term not yet seen in the current doc but previously
156         // seen in other doc(s) since the last flush
157
158         // Now that we know doc freq for previous doc,
159         // write it & lastDocCode
160         if (1 == postings.docFreqs[termID])
161           termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]|1);
162         else {
163           termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
164           termsHashPerField.writeVInt(0, postings.docFreqs[termID]);
165         }
166         postings.docFreqs[termID] = 1;
167         fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
168         postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
169         postings.lastDocIDs[termID] = docState.docID;
170         if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
171           writeProx(termID, fieldState.position);
172         }
173         fieldState.uniqueTermCount++;
174       } else {
175         fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
176         if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
177           writeProx(termID, fieldState.position-postings.lastPositions[termID]);
178         }
179       }
180     }
181   }
182   
183   @Override
184   ParallelPostingsArray createPostingsArray(int size) {
185     return new FreqProxPostingsArray(size);
186   }
187
188   static final class FreqProxPostingsArray extends ParallelPostingsArray {
189     public FreqProxPostingsArray(int size) {
190       super(size);
191       docFreqs = new int[size];
192       lastDocIDs = new int[size];
193       lastDocCodes = new int[size];
194       lastPositions = new int[size];
195     }
196
197     int docFreqs[];                                    // # times this term occurs in the current doc
198     int lastDocIDs[];                                  // Last docID where this term occurred
199     int lastDocCodes[];                                // Code for prior doc
200     int lastPositions[];                               // Last position where this term occurred
201
202     @Override
203     ParallelPostingsArray newInstance(int size) {
204       return new FreqProxPostingsArray(size);
205     }
206
207     @Override
208     void copyTo(ParallelPostingsArray toArray, int numToCopy) {
209       assert toArray instanceof FreqProxPostingsArray;
210       FreqProxPostingsArray to = (FreqProxPostingsArray) toArray;
211
212       super.copyTo(toArray, numToCopy);
213
214       System.arraycopy(docFreqs, 0, to.docFreqs, 0, numToCopy);
215       System.arraycopy(lastDocIDs, 0, to.lastDocIDs, 0, numToCopy);
216       System.arraycopy(lastDocCodes, 0, to.lastDocCodes, 0, numToCopy);
217       System.arraycopy(lastPositions, 0, to.lastPositions, 0, numToCopy);
218     }
219
220     @Override
221     int bytesPerPosting() {
222       return ParallelPostingsArray.BYTES_PER_POSTING + 4 * RamUsageEstimator.NUM_BYTES_INT;
223     }
224   }
225   
226   public void abort() {}
227 }
228