X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java diff --git a/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java new file mode 100644 index 0000000..2a75dd7 --- /dev/null +++ b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java @@ -0,0 +1,119 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.index.FreqProxTermsWriterPerField.FreqProxPostingsArray; + +// TODO FI: some of this is "generic" to TermsHash* so we +// should factor it out so other consumers don't have to +// duplicate this code + +/** Used by DocumentsWriter to merge the postings from + * multiple ThreadStates when creating a segment */ +final class FreqProxFieldMergeState { + + final FreqProxTermsWriterPerField field; + final int numPostings; + final CharBlockPool charPool; + final int[] termIDs; + final FreqProxPostingsArray postings; + int currentTermID; + + char[] text; + int textOffset; + + private int postingUpto = -1; + + final ByteSliceReader freq = new ByteSliceReader(); + final ByteSliceReader prox = new ByteSliceReader(); + + int docID; + int termFreq; + + public FreqProxFieldMergeState(FreqProxTermsWriterPerField field) { + this.field = field; + this.charPool = field.perThread.termsHashPerThread.charPool; + this.numPostings = field.termsHashPerField.numPostings; + this.termIDs = field.termsHashPerField.sortPostings(); + this.postings = (FreqProxPostingsArray) field.termsHashPerField.postingsArray; + } + + boolean nextTerm() throws IOException { + postingUpto++; + if (postingUpto == numPostings) + return false; + + currentTermID = termIDs[postingUpto]; + docID = 0; + + final int textStart = postings.textStarts[currentTermID]; + text = charPool.buffers[textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; + textOffset = textStart & DocumentsWriter.CHAR_BLOCK_MASK; + + field.termsHashPerField.initReader(freq, currentTermID, 0); + if (field.fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) + field.termsHashPerField.initReader(prox, currentTermID, 1); + + // Should always be true + boolean result = nextDoc(); + assert result; + + return true; + } + + public String termText() { + int upto = textOffset; + while(text[upto] != 0xffff) { + upto++; + } + return new String(text, textOffset, upto-textOffset); + } + + public boolean nextDoc() throws IOException { + if (freq.eof()) { + if (postings.lastDocCodes[currentTermID] != -1) { + // Return last doc + docID = postings.lastDocIDs[currentTermID]; + if (field.indexOptions != IndexOptions.DOCS_ONLY) + termFreq = postings.docFreqs[currentTermID]; + postings.lastDocCodes[currentTermID] = -1; + return true; + } else + // EOF + return false; + } + + final int code = freq.readVInt(); + if (field.indexOptions == IndexOptions.DOCS_ONLY) + docID += code; + else { + docID += code >>> 1; + if ((code & 1) != 0) + termFreq = 1; + else + termFreq = freq.readVInt(); + } + + assert docID != postings.lastDocIDs[currentTermID]; + + return true; + } +}