1 package org.apache.lucene.index;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.io.Reader;
22 import org.apache.lucene.document.Fieldable;
23 import org.apache.lucene.analysis.TokenStream;
24 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
25 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
28 * Holds state for inverting all occurrences of a single
29 * field in the document. This class doesn't do anything
30 * itself; instead, it forwards the tokens produced by
31 * analysis to its own consumer
32 * (InvertedDocConsumerPerField). It also interacts with an
33 * endConsumer (InvertedDocEndConsumerPerField).
36 final class DocInverterPerField extends DocFieldConsumerPerField {
38 final private DocInverterPerThread perThread;
39 final private FieldInfo fieldInfo;
40 final InvertedDocConsumerPerField consumer;
41 final InvertedDocEndConsumerPerField endConsumer;
42 final DocumentsWriter.DocState docState;
43 final FieldInvertState fieldState;
45 public DocInverterPerField(DocInverterPerThread perThread, FieldInfo fieldInfo) {
46 this.perThread = perThread;
47 this.fieldInfo = fieldInfo;
48 docState = perThread.docState;
49 fieldState = perThread.fieldState;
50 this.consumer = perThread.consumer.addField(this, fieldInfo);
51 this.endConsumer = perThread.endConsumer.addField(this, fieldInfo);
64 public void processFields(final Fieldable[] fields,
65 final int count) throws IOException {
67 fieldState.reset(docState.doc.getBoost());
69 final int maxFieldLength = docState.maxFieldLength;
71 final boolean doInvert = consumer.start(fields, count);
73 for(int i=0;i<count;i++) {
75 final Fieldable field = fields[i];
77 // TODO FI: this should be "genericized" to querying
78 // consumer if it wants to see this particular field
80 if (field.isIndexed() && doInvert) {
83 fieldState.position += docState.analyzer == null ? 0 : docState.analyzer.getPositionIncrementGap(fieldInfo.name);
85 if (!field.isTokenized()) { // un-tokenized field
86 String stringValue = field.stringValue();
87 final int valueLength = stringValue.length();
88 perThread.singleToken.reinit(stringValue, 0, valueLength);
89 fieldState.attributeSource = perThread.singleToken;
90 consumer.start(field);
92 boolean success = false;
98 docState.docWriter.setAborting();
100 fieldState.offset += valueLength;
102 fieldState.position++;
103 } else { // tokenized field
104 final TokenStream stream;
105 final TokenStream streamValue = field.tokenStreamValue();
107 if (streamValue != null)
108 stream = streamValue;
110 // the field does not have a TokenStream,
111 // so we have to obtain one from the analyzer
112 final Reader reader; // find or make Reader
113 final Reader readerValue = field.readerValue();
115 if (readerValue != null)
116 reader = readerValue;
118 String stringValue = field.stringValue();
119 if (stringValue == null)
120 throw new IllegalArgumentException("field must have either TokenStream, String or Reader value");
121 perThread.stringReader.init(stringValue);
122 reader = perThread.stringReader;
125 // Tokenize field and add to postingTable
126 stream = docState.analyzer.reusableTokenStream(fieldInfo.name, reader);
129 // reset the TokenStream to the first token
132 final int startLength = fieldState.length;
135 boolean hasMoreTokens = stream.incrementToken();
137 fieldState.attributeSource = stream;
139 OffsetAttribute offsetAttribute = fieldState.attributeSource.addAttribute(OffsetAttribute.class);
140 PositionIncrementAttribute posIncrAttribute = fieldState.attributeSource.addAttribute(PositionIncrementAttribute.class);
142 consumer.start(field);
146 // If we hit an exception in stream.next below
147 // (which is fairly common, eg if analyzer
148 // chokes on a given document), then it's
149 // non-aborting and (above) this one document
150 // will be marked as deleted, but still
153 if (!hasMoreTokens) break;
155 final int posIncr = posIncrAttribute.getPositionIncrement();
156 fieldState.position += posIncr;
157 if (fieldState.position > 0) {
158 fieldState.position--;
162 fieldState.numOverlap++;
164 boolean success = false;
166 // If we hit an exception in here, we abort
167 // all buffered documents since the last
168 // flush, on the likelihood that the
169 // internal state of the consumer is now
170 // corrupt and should not be flushed to a
176 docState.docWriter.setAborting();
178 fieldState.position++;
179 if (++fieldState.length >= maxFieldLength) {
180 if (docState.infoStream != null)
181 docState.infoStream.println("maxFieldLength " +maxFieldLength+ " reached for field " + fieldInfo.name + ", ignoring following tokens");
185 hasMoreTokens = stream.incrementToken();
187 // trigger streams to perform end-of-stream operations
190 fieldState.offset += offsetAttribute.endOffset();
196 fieldState.offset += docState.analyzer == null ? 0 : docState.analyzer.getOffsetGap(field);
197 fieldState.boost *= field.getBoost();
200 // LUCENE-2387: don't hang onto the field, so GC can
206 endConsumer.finish();