add --shared
[pylucene.git] / lucene-java-3.4.0 / lucene / src / java / org / apache / lucene / index / DocInverterPerField.java
1 package org.apache.lucene.index;
2
3 /**
4  * Licensed to the Apache Software Foundation (ASF) under one or more
5  * contributor license agreements.  See the NOTICE file distributed with
6  * this work for additional information regarding copyright ownership.
7  * The ASF licenses this file to You under the Apache License, Version 2.0
8  * (the "License"); you may not use this file except in compliance with
9  * the License.  You may obtain a copy of the License at
10  *
11  *     http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19
20 import java.io.IOException;
21 import java.io.Reader;
22 import org.apache.lucene.document.Fieldable;
23 import org.apache.lucene.analysis.TokenStream;
24 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
25 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
26
27 /**
28  * Holds state for inverting all occurrences of a single
29  * field in the document.  This class doesn't do anything
30  * itself; instead, it forwards the tokens produced by
31  * analysis to its own consumer
32  * (InvertedDocConsumerPerField).  It also interacts with an
33  * endConsumer (InvertedDocEndConsumerPerField).
34  */
35
36 final class DocInverterPerField extends DocFieldConsumerPerField {
37
38   final private DocInverterPerThread perThread;
39   final private FieldInfo fieldInfo;
40   final InvertedDocConsumerPerField consumer;
41   final InvertedDocEndConsumerPerField endConsumer;
42   final DocumentsWriter.DocState docState;
43   final FieldInvertState fieldState;
44
45   public DocInverterPerField(DocInverterPerThread perThread, FieldInfo fieldInfo) {
46     this.perThread = perThread;
47     this.fieldInfo = fieldInfo;
48     docState = perThread.docState;
49     fieldState = perThread.fieldState;
50     this.consumer = perThread.consumer.addField(this, fieldInfo);
51     this.endConsumer = perThread.endConsumer.addField(this, fieldInfo);
52   }
53
54   @Override
55   void abort() {
56     try {
57       consumer.abort();
58     } finally {
59       endConsumer.abort();
60     }
61   }
62
63   @Override
64   public void processFields(final Fieldable[] fields,
65                             final int count) throws IOException {
66
67     fieldState.reset(docState.doc.getBoost());
68
69     final int maxFieldLength = docState.maxFieldLength;
70
71     final boolean doInvert = consumer.start(fields, count);
72
73     for(int i=0;i<count;i++) {
74
75       final Fieldable field = fields[i];
76
77       // TODO FI: this should be "genericized" to querying
78       // consumer if it wants to see this particular field
79       // tokenized.
80       if (field.isIndexed() && doInvert) {
81         
82         if (i > 0)
83           fieldState.position += docState.analyzer == null ? 0 : docState.analyzer.getPositionIncrementGap(fieldInfo.name);
84
85         if (!field.isTokenized()) {               // un-tokenized field
86           String stringValue = field.stringValue();
87           final int valueLength = stringValue.length();
88           perThread.singleToken.reinit(stringValue, 0, valueLength);
89           fieldState.attributeSource = perThread.singleToken;
90           consumer.start(field);
91
92           boolean success = false;
93           try {
94             consumer.add();
95             success = true;
96           } finally {
97             if (!success)
98               docState.docWriter.setAborting();
99           }
100           fieldState.offset += valueLength;
101           fieldState.length++;
102           fieldState.position++;
103         } else {                                  // tokenized field
104           final TokenStream stream;
105           final TokenStream streamValue = field.tokenStreamValue();
106
107           if (streamValue != null) 
108             stream = streamValue;
109           else {
110             // the field does not have a TokenStream,
111             // so we have to obtain one from the analyzer
112             final Reader reader;                          // find or make Reader
113             final Reader readerValue = field.readerValue();
114
115             if (readerValue != null)
116               reader = readerValue;
117             else {
118               String stringValue = field.stringValue();
119               if (stringValue == null)
120                 throw new IllegalArgumentException("field must have either TokenStream, String or Reader value");
121               perThread.stringReader.init(stringValue);
122               reader = perThread.stringReader;
123             }
124           
125             // Tokenize field and add to postingTable
126             stream = docState.analyzer.reusableTokenStream(fieldInfo.name, reader);
127           }
128
129           // reset the TokenStream to the first token
130           stream.reset();
131
132           final int startLength = fieldState.length;
133           
134           try {
135             boolean hasMoreTokens = stream.incrementToken();
136
137             fieldState.attributeSource = stream;
138
139             OffsetAttribute offsetAttribute = fieldState.attributeSource.addAttribute(OffsetAttribute.class);
140             PositionIncrementAttribute posIncrAttribute = fieldState.attributeSource.addAttribute(PositionIncrementAttribute.class);
141             
142             consumer.start(field);
143             
144             for(;;) {
145
146               // If we hit an exception in stream.next below
147               // (which is fairly common, eg if analyzer
148               // chokes on a given document), then it's
149               // non-aborting and (above) this one document
150               // will be marked as deleted, but still
151               // consume a docID
152               
153               if (!hasMoreTokens) break;
154               
155               final int posIncr = posIncrAttribute.getPositionIncrement();
156               fieldState.position += posIncr;
157               if (fieldState.position > 0) {
158                 fieldState.position--;
159               }
160
161               if (posIncr == 0)
162                 fieldState.numOverlap++;
163
164               boolean success = false;
165               try {
166                 // If we hit an exception in here, we abort
167                 // all buffered documents since the last
168                 // flush, on the likelihood that the
169                 // internal state of the consumer is now
170                 // corrupt and should not be flushed to a
171                 // new segment:
172                 consumer.add();
173                 success = true;
174               } finally {
175                 if (!success)
176                   docState.docWriter.setAborting();
177               }
178               fieldState.position++;
179               if (++fieldState.length >= maxFieldLength) {
180                 if (docState.infoStream != null)
181                   docState.infoStream.println("maxFieldLength " +maxFieldLength+ " reached for field " + fieldInfo.name + ", ignoring following tokens");
182                 break;
183               }
184
185               hasMoreTokens = stream.incrementToken();
186             }
187             // trigger streams to perform end-of-stream operations
188             stream.end();
189             
190             fieldState.offset += offsetAttribute.endOffset();
191           } finally {
192             stream.close();
193           }
194         }
195
196         fieldState.offset += docState.analyzer == null ? 0 : docState.analyzer.getOffsetGap(field);
197         fieldState.boost *= field.getBoost();
198       }
199
200       // LUCENE-2387: don't hang onto the field, so GC can
201       // reclaim
202       fields[i] = null;
203     }
204
205     consumer.finish();
206     endConsumer.finish();
207   }
208 }