+++ /dev/null
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.index.FieldInfo.IndexOptions;
-import org.apache.lucene.util.RamUsageEstimator;
-
-// TODO: break into separate freq and prox writers as
-// codecs; make separate container (tii/tis/skip/*) that can
-// be configured as any number of files 1..N
-final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implements Comparable<FreqProxTermsWriterPerField> {
-
- final FreqProxTermsWriterPerThread perThread;
- final TermsHashPerField termsHashPerField;
- final FieldInfo fieldInfo;
- final DocumentsWriter.DocState docState;
- final FieldInvertState fieldState;
- IndexOptions indexOptions;
- PayloadAttribute payloadAttribute;
-
- public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriterPerThread perThread, FieldInfo fieldInfo) {
- this.termsHashPerField = termsHashPerField;
- this.perThread = perThread;
- this.fieldInfo = fieldInfo;
- docState = termsHashPerField.docState;
- fieldState = termsHashPerField.fieldState;
- indexOptions = fieldInfo.indexOptions;
- }
-
- @Override
- int getStreamCount() {
- if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
- return 1;
- else
- return 2;
- }
-
- @Override
- void finish() {}
-
- boolean hasPayloads;
-
- @Override
- void skippingLongTerm() throws IOException {}
-
- public int compareTo(FreqProxTermsWriterPerField other) {
- return fieldInfo.name.compareTo(other.fieldInfo.name);
- }
-
- void reset() {
- // Record, up front, whether our in-RAM format will be
- // with or without term freqs:
- indexOptions = fieldInfo.indexOptions;
- payloadAttribute = null;
- }
-
- @Override
- boolean start(Fieldable[] fields, int count) {
- for(int i=0;i<count;i++)
- if (fields[i].isIndexed())
- return true;
- return false;
- }
-
- @Override
- void start(Fieldable f) {
- if (fieldState.attributeSource.hasAttribute(PayloadAttribute.class)) {
- payloadAttribute = fieldState.attributeSource.getAttribute(PayloadAttribute.class);
- } else {
- payloadAttribute = null;
- }
- }
-
- void writeProx(final int termID, int proxCode) {
- final Payload payload;
- if (payloadAttribute == null) {
- payload = null;
- } else {
- payload = payloadAttribute.getPayload();
- }
-
- if (payload != null && payload.length > 0) {
- termsHashPerField.writeVInt(1, (proxCode<<1)|1);
- termsHashPerField.writeVInt(1, payload.length);
- termsHashPerField.writeBytes(1, payload.data, payload.offset, payload.length);
- hasPayloads = true;
- } else
- termsHashPerField.writeVInt(1, proxCode<<1);
-
- FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
- postings.lastPositions[termID] = fieldState.position;
-
- }
-
- @Override
- void newTerm(final int termID) {
- // First time we're seeing this term since the last
- // flush
- assert docState.testPoint("FreqProxTermsWriterPerField.newTerm start");
-
- FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
- postings.lastDocIDs[termID] = docState.docID;
- if (indexOptions == IndexOptions.DOCS_ONLY) {
- postings.lastDocCodes[termID] = docState.docID;
- } else {
- postings.lastDocCodes[termID] = docState.docID << 1;
- postings.docFreqs[termID] = 1;
- if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
- writeProx(termID, fieldState.position);
- }
- }
- fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
- fieldState.uniqueTermCount++;
- }
-
- @Override
- void addTerm(final int termID) {
-
- assert docState.testPoint("FreqProxTermsWriterPerField.addTerm start");
-
- FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
-
- assert indexOptions == IndexOptions.DOCS_ONLY || postings.docFreqs[termID] > 0;
-
- if (indexOptions == IndexOptions.DOCS_ONLY) {
- if (docState.docID != postings.lastDocIDs[termID]) {
- assert docState.docID > postings.lastDocIDs[termID];
- termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
- postings.lastDocCodes[termID] = docState.docID - postings.lastDocIDs[termID];
- postings.lastDocIDs[termID] = docState.docID;
- fieldState.uniqueTermCount++;
- }
- } else {
- if (docState.docID != postings.lastDocIDs[termID]) {
- assert docState.docID > postings.lastDocIDs[termID];
- // Term not yet seen in the current doc but previously
- // seen in other doc(s) since the last flush
-
- // Now that we know doc freq for previous doc,
- // write it & lastDocCode
- if (1 == postings.docFreqs[termID])
- termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]|1);
- else {
- termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
- termsHashPerField.writeVInt(0, postings.docFreqs[termID]);
- }
- postings.docFreqs[termID] = 1;
- fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
- postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
- postings.lastDocIDs[termID] = docState.docID;
- if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
- writeProx(termID, fieldState.position);
- }
- fieldState.uniqueTermCount++;
- } else {
- fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
- if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
- writeProx(termID, fieldState.position-postings.lastPositions[termID]);
- }
- }
- }
- }
-
- @Override
- ParallelPostingsArray createPostingsArray(int size) {
- return new FreqProxPostingsArray(size);
- }
-
- static final class FreqProxPostingsArray extends ParallelPostingsArray {
- public FreqProxPostingsArray(int size) {
- super(size);
- docFreqs = new int[size];
- lastDocIDs = new int[size];
- lastDocCodes = new int[size];
- lastPositions = new int[size];
- }
-
- int docFreqs[]; // # times this term occurs in the current doc
- int lastDocIDs[]; // Last docID where this term occurred
- int lastDocCodes[]; // Code for prior doc
- int lastPositions[]; // Last position where this term occurred
-
- @Override
- ParallelPostingsArray newInstance(int size) {
- return new FreqProxPostingsArray(size);
- }
-
- @Override
- void copyTo(ParallelPostingsArray toArray, int numToCopy) {
- assert toArray instanceof FreqProxPostingsArray;
- FreqProxPostingsArray to = (FreqProxPostingsArray) toArray;
-
- super.copyTo(toArray, numToCopy);
-
- System.arraycopy(docFreqs, 0, to.docFreqs, 0, numToCopy);
- System.arraycopy(lastDocIDs, 0, to.lastDocIDs, 0, numToCopy);
- System.arraycopy(lastDocCodes, 0, to.lastDocCodes, 0, numToCopy);
- System.arraycopy(lastPositions, 0, to.lastPositions, 0, numToCopy);
- }
-
- @Override
- int bytesPerPosting() {
- return ParallelPostingsArray.BYTES_PER_POSTING + 4 * RamUsageEstimator.NUM_BYTES_INT;
- }
- }
-
- public void abort() {}
-}
-