X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java diff --git a/lucene-java-3.5.0/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java b/lucene-java-3.5.0/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java new file mode 100644 index 0000000..ce7d468 --- /dev/null +++ b/lucene-java-3.5.0/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java @@ -0,0 +1,443 @@ +package org.apache.lucene.store.instantiated; + +/** + * Copyright 2006 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.index.*; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BitVector; + +/** + * An InstantiatedIndexReader is not a snapshot in time, it is completely in + * sync with the latest commit to the store! + *

+ * Consider using InstantiatedIndex as if it was immutable. + */ +public class InstantiatedIndexReader extends IndexReader { + + private final InstantiatedIndex index; + + public InstantiatedIndexReader(InstantiatedIndex index) { + super(); + this.index = index; + readerFinishedListeners = Collections.synchronizedSet(new HashSet()); + } + + @Deprecated + @Override + public boolean isOptimized() { + return true; + } + + /** + * An InstantiatedIndexReader is not a snapshot in time, it is completely in + * sync with the latest commit to the store! + * + * @return output from {@link InstantiatedIndex#getVersion()} in associated instantiated index. + */ + @Override + public long getVersion() { + return index.getVersion(); + } + + @Override + public Directory directory() { + throw new UnsupportedOperationException(); + } + + /** + * An InstantiatedIndexReader is always current! + * + * Check whether this IndexReader is still using the current (i.e., most + * recently committed) version of the index. If a writer has committed any + * changes to the index since this reader was opened, this will return + * false, in which case you must open a new IndexReader in + * order to see the changes. See the description of the autoCommit flag + * which controls when the {@link IndexWriter} actually commits changes to the + * index. + * + * @return always true + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + * @throws UnsupportedOperationException unless overridden in subclass + */ + @Override + public boolean isCurrent() throws IOException { + return true; + } + + public InstantiatedIndex getIndex() { + return index; + } + + private BitVector uncommittedDeletedDocuments; + + private Map> uncommittedNormsByFieldNameAndDocumentNumber = null; + + private class NormUpdate { + private int doc; + private byte value; + + public NormUpdate(int doc, byte value) { + this.doc = doc; + this.value = value; + } + } + + @Override + public int numDocs() { + // todo i suppose this value could be cached, but array#length and bitvector#count is fast. + int numDocs = getIndex().getDocumentsByNumber().length; + if (uncommittedDeletedDocuments != null) { + numDocs -= uncommittedDeletedDocuments.count(); + } + if (index.getDeletedDocuments() != null) { + numDocs -= index.getDeletedDocuments().count(); + } + return numDocs; + } + + @Override + public int maxDoc() { + return getIndex().getDocumentsByNumber().length; + } + + @Override + public boolean hasDeletions() { + return index.getDeletedDocuments() != null || uncommittedDeletedDocuments != null; + } + + + @Override + public boolean isDeleted(int n) { + return (index.getDeletedDocuments() != null && index.getDeletedDocuments().get(n)) + || (uncommittedDeletedDocuments != null && uncommittedDeletedDocuments.get(n)); + } + + + @Override + protected void doDelete(int docNum) throws IOException { + + // dont delete if already deleted + if ((index.getDeletedDocuments() != null && index.getDeletedDocuments().get(docNum)) + || (uncommittedDeletedDocuments != null && uncommittedDeletedDocuments.get(docNum))) { + return; + } + + if (uncommittedDeletedDocuments == null) { + uncommittedDeletedDocuments = new BitVector(maxDoc()); + } + + uncommittedDeletedDocuments.set(docNum); + } + + @Override + protected void doUndeleteAll() throws IOException { + // todo: read/write lock + uncommittedDeletedDocuments = null; + // todo: read/write unlock + } + + @Override + protected void doCommit(Map commitUserData) throws IOException { + // todo: read/write lock + + // 1. update norms + if (uncommittedNormsByFieldNameAndDocumentNumber != null) { + for (Map.Entry> e : uncommittedNormsByFieldNameAndDocumentNumber.entrySet()) { + byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(e.getKey()); + for (NormUpdate normUpdate : e.getValue()) { + norms[normUpdate.doc] = normUpdate.value; + } + } + uncommittedNormsByFieldNameAndDocumentNumber = null; + } + + // 2. remove deleted documents + if (uncommittedDeletedDocuments != null) { + if (index.getDeletedDocuments() == null) { + index.setDeletedDocuments(uncommittedDeletedDocuments); + } else { + for (int d = 0; d< uncommittedDeletedDocuments.size(); d++) { + if (uncommittedDeletedDocuments.get(d)) { + index.getDeletedDocuments().set(d); + } + } + } + uncommittedDeletedDocuments = null; + } + + // todo unlock read/writelock + } + + @Override + protected void doClose() throws IOException { + // ignored + // todo perhaps release all associated instances? + } + + @Override + public Collection getFieldNames(FieldOption fieldOption) { + Set fieldSet = new HashSet(); + for (FieldSetting fi : index.getFieldSettings().values()) { + if (fieldOption == IndexReader.FieldOption.ALL) { + fieldSet.add(fi.fieldName); + } else if (!fi.indexed && fieldOption == IndexReader.FieldOption.UNINDEXED) { + fieldSet.add(fi.fieldName); + } else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) { + fieldSet.add(fi.fieldName); + } else if (fi.indexed && fieldOption == IndexReader.FieldOption.INDEXED) { + fieldSet.add(fi.fieldName); + } else if (fi.indexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) { + fieldSet.add(fi.fieldName); + } else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false + && fieldOption == IndexReader.FieldOption.TERMVECTOR) { + fieldSet.add(fi.fieldName); + } else if (fi.indexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) { + fieldSet.add(fi.fieldName); + } else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false + && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) { + fieldSet.add(fi.fieldName); + } else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false + && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) { + fieldSet.add(fi.fieldName); + } else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) + && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) { + fieldSet.add(fi.fieldName); + } + } + return fieldSet; + } + + /** + * Return the {@link org.apache.lucene.document.Document} at the nth + * position. +

+ * Warning! + * The resulting document is the actual stored document instance + * and not a deserialized clone as retuned by an IndexReader + * over a {@link org.apache.lucene.store.Directory}. + * I.e., if you need to touch the document, clone it first! + *

+ * This can also be seen as a feature for live changes of stored values, + * but be careful! Adding a field with an name unknown to the index + * or to a field with previously no stored values will make + * {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)} + * out of sync, causing problems for instance when merging the + * instantiated index to another index. +

+ * This implementation ignores the field selector! All stored fields are always returned! + *

+ * + * @param n document number + * @param fieldSelector ignored + * @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + * + * @see org.apache.lucene.document.Fieldable + * @see org.apache.lucene.document.FieldSelector + * @see org.apache.lucene.document.SetBasedFieldSelector + * @see org.apache.lucene.document.LoadFirstFieldSelector + */ + @Override + public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException { + return document(n); + } + + /** + * Returns the stored fields of the nth + * Document in this index. + *

+ * Warning! + * The resulting document is the actual stored document instance + * and not a deserialized clone as retuned by an IndexReader + * over a {@link org.apache.lucene.store.Directory}. + * I.e., if you need to touch the document, clone it first! + *

+ * This can also be seen as a feature for live changes of stored values, + * but be careful! Adding a field with an name unknown to the index + * or to a field with previously no stored values will make + * {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)} + * out of sync, causing problems for instance when merging the + * instantiated index to another index. + * + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + */ + + @Override + public Document document(int n) throws IOException { + return isDeleted(n) ? null : getIndex().getDocumentsByNumber()[n].getDocument(); + } + + /** + * never ever touch these values. it is the true values, unless norms have + * been touched. + */ + @Override + public byte[] norms(String field) throws IOException { + byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field); + if (norms == null) { + return new byte[0]; // todo a static final zero length attribute? + } + if (uncommittedNormsByFieldNameAndDocumentNumber != null) { + norms = norms.clone(); + List updated = uncommittedNormsByFieldNameAndDocumentNumber.get(field); + if (updated != null) { + for (NormUpdate normUpdate : updated) { + norms[normUpdate.doc] = normUpdate.value; + } + } + } + return norms; + } + + @Override + public void norms(String field, byte[] bytes, int offset) throws IOException { + byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field); + if (norms == null) { + return; + } + System.arraycopy(norms, 0, bytes, offset, norms.length); + } + + @Override + protected void doSetNorm(int doc, String field, byte value) throws IOException { + if (uncommittedNormsByFieldNameAndDocumentNumber == null) { + uncommittedNormsByFieldNameAndDocumentNumber = new HashMap>(getIndex().getNormsByFieldNameAndDocumentNumber().size()); + } + List list = uncommittedNormsByFieldNameAndDocumentNumber.get(field); + if (list == null) { + list = new LinkedList(); + uncommittedNormsByFieldNameAndDocumentNumber.put(field, list); + } + list.add(new NormUpdate(doc, value)); + } + + @Override + public int docFreq(Term t) throws IOException { + InstantiatedTerm term = getIndex().findTerm(t); + if (term == null) { + return 0; + } else { + return term.getAssociatedDocuments().length; + } + } + + @Override + public TermEnum terms() throws IOException { + return new InstantiatedTermEnum(this); + } + + @Override + public TermEnum terms(Term t) throws IOException { + InstantiatedTerm it = getIndex().findTerm(t); + if (it != null) { + return new InstantiatedTermEnum(this, it.getTermIndex()); + } else { + int startPos = Arrays.binarySearch(index.getOrderedTerms(), t, InstantiatedTerm.termComparator); + if (startPos < 0) { + startPos = -1 - startPos; + } + return new InstantiatedTermEnum(this, startPos); + } + } + + @Override + public TermDocs termDocs() throws IOException { + return new InstantiatedTermDocs(this); + } + + + @Override + public TermDocs termDocs(Term term) throws IOException { + if (term == null) { + return new InstantiatedAllTermDocs(this); + } else { + InstantiatedTermDocs termDocs = new InstantiatedTermDocs(this); + termDocs.seek(term); + return termDocs; + } + } + + @Override + public TermPositions termPositions() throws IOException { + return new InstantiatedTermPositions(this); + } + + @Override + public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException { + InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber]; + if (doc.getVectorSpace() == null) { + return null; + } + TermFreqVector[] ret = new TermFreqVector[doc.getVectorSpace().size()]; + Iterator it = doc.getVectorSpace().keySet().iterator(); + for (int i = 0; i < ret.length; i++) { + ret[i] = new InstantiatedTermPositionVector(getIndex().getDocumentsByNumber()[docNumber], it.next()); + } + return ret; + } + + @Override + public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException { + InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber]; + if (doc.getVectorSpace() == null || doc.getVectorSpace().get(field) == null) { + return null; + } else { + return new InstantiatedTermPositionVector(doc, field); + } + } + + @Override + public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException { + InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber]; + if (doc.getVectorSpace() != null && doc.getVectorSpace().get(field) == null) { + List tv = doc.getVectorSpace().get(field); + mapper.setExpectations(field, tv.size(), true, true); + for (InstantiatedTermDocumentInformation tdi : tv) { + mapper.map(tdi.getTerm().text(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions()); + } + } + } + + @Override + public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException { + InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber]; + for (Map.Entry> e : doc.getVectorSpace().entrySet()) { + mapper.setExpectations(e.getKey(), e.getValue().size(), true, true); + for (InstantiatedTermDocumentInformation tdi : e.getValue()) { + mapper.map(tdi.getTerm().text(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions()); + } + } + } +}