+++ /dev/null
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.io.Reader;
-import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-
-/**
- * Holds state for inverting all occurrences of a single
- * field in the document. This class doesn't do anything
- * itself; instead, it forwards the tokens produced by
- * analysis to its own consumer
- * (InvertedDocConsumerPerField). It also interacts with an
- * endConsumer (InvertedDocEndConsumerPerField).
- */
-
-final class DocInverterPerField extends DocFieldConsumerPerField {
-
- final private DocInverterPerThread perThread;
- final private FieldInfo fieldInfo;
- final InvertedDocConsumerPerField consumer;
- final InvertedDocEndConsumerPerField endConsumer;
- final DocumentsWriter.DocState docState;
- final FieldInvertState fieldState;
-
- public DocInverterPerField(DocInverterPerThread perThread, FieldInfo fieldInfo) {
- this.perThread = perThread;
- this.fieldInfo = fieldInfo;
- docState = perThread.docState;
- fieldState = perThread.fieldState;
- this.consumer = perThread.consumer.addField(this, fieldInfo);
- this.endConsumer = perThread.endConsumer.addField(this, fieldInfo);
- }
-
- @Override
- void abort() {
- try {
- consumer.abort();
- } finally {
- endConsumer.abort();
- }
- }
-
- @Override
- public void processFields(final Fieldable[] fields,
- final int count) throws IOException {
-
- fieldState.reset(docState.doc.getBoost());
-
- final int maxFieldLength = docState.maxFieldLength;
-
- final boolean doInvert = consumer.start(fields, count);
-
- for(int i=0;i<count;i++) {
-
- final Fieldable field = fields[i];
-
- // TODO FI: this should be "genericized" to querying
- // consumer if it wants to see this particular field
- // tokenized.
- if (field.isIndexed() && doInvert) {
-
- if (i > 0)
- fieldState.position += docState.analyzer == null ? 0 : docState.analyzer.getPositionIncrementGap(fieldInfo.name);
-
- if (!field.isTokenized()) { // un-tokenized field
- String stringValue = field.stringValue();
- final int valueLength = stringValue.length();
- perThread.singleToken.reinit(stringValue, 0, valueLength);
- fieldState.attributeSource = perThread.singleToken;
- consumer.start(field);
-
- boolean success = false;
- try {
- consumer.add();
- success = true;
- } finally {
- if (!success)
- docState.docWriter.setAborting();
- }
- fieldState.offset += valueLength;
- fieldState.length++;
- fieldState.position++;
- } else { // tokenized field
- final TokenStream stream;
- final TokenStream streamValue = field.tokenStreamValue();
-
- if (streamValue != null)
- stream = streamValue;
- else {
- // the field does not have a TokenStream,
- // so we have to obtain one from the analyzer
- final Reader reader; // find or make Reader
- final Reader readerValue = field.readerValue();
-
- if (readerValue != null)
- reader = readerValue;
- else {
- String stringValue = field.stringValue();
- if (stringValue == null)
- throw new IllegalArgumentException("field must have either TokenStream, String or Reader value");
- perThread.stringReader.init(stringValue);
- reader = perThread.stringReader;
- }
-
- // Tokenize field and add to postingTable
- stream = docState.analyzer.reusableTokenStream(fieldInfo.name, reader);
- }
-
- // reset the TokenStream to the first token
- stream.reset();
-
- final int startLength = fieldState.length;
-
- try {
- boolean hasMoreTokens = stream.incrementToken();
-
- fieldState.attributeSource = stream;
-
- OffsetAttribute offsetAttribute = fieldState.attributeSource.addAttribute(OffsetAttribute.class);
- PositionIncrementAttribute posIncrAttribute = fieldState.attributeSource.addAttribute(PositionIncrementAttribute.class);
-
- consumer.start(field);
-
- for(;;) {
-
- // If we hit an exception in stream.next below
- // (which is fairly common, eg if analyzer
- // chokes on a given document), then it's
- // non-aborting and (above) this one document
- // will be marked as deleted, but still
- // consume a docID
-
- if (!hasMoreTokens) break;
-
- final int posIncr = posIncrAttribute.getPositionIncrement();
- fieldState.position += posIncr;
- if (fieldState.position > 0) {
- fieldState.position--;
- }
-
- if (posIncr == 0)
- fieldState.numOverlap++;
-
- boolean success = false;
- try {
- // If we hit an exception in here, we abort
- // all buffered documents since the last
- // flush, on the likelihood that the
- // internal state of the consumer is now
- // corrupt and should not be flushed to a
- // new segment:
- consumer.add();
- success = true;
- } finally {
- if (!success)
- docState.docWriter.setAborting();
- }
- fieldState.position++;
- if (++fieldState.length >= maxFieldLength) {
- if (docState.infoStream != null)
- docState.infoStream.println("maxFieldLength " +maxFieldLength+ " reached for field " + fieldInfo.name + ", ignoring following tokens");
- break;
- }
-
- hasMoreTokens = stream.incrementToken();
- }
- // trigger streams to perform end-of-stream operations
- stream.end();
-
- fieldState.offset += offsetAttribute.endOffset();
- } finally {
- stream.close();
- }
- }
-
- fieldState.offset += docState.analyzer == null ? 0 : docState.analyzer.getOffsetGap(field);
- fieldState.boost *= field.getBoost();
- }
-
- // LUCENE-2387: don't hang onto the field, so GC can
- // reclaim
- fields[i] = null;
- }
-
- consumer.finish();
- endConsumer.finish();
- }
-}