1 package org.apache.lucene.store.instantiated;
4 * Copyright 2006 The Apache Software Foundation
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
19 import java.io.IOException;
20 import java.util.Arrays;
21 import java.util.Collection;
22 import java.util.Collections;
23 import java.util.HashMap;
24 import java.util.HashSet;
25 import java.util.Iterator;
26 import java.util.LinkedList;
27 import java.util.List;
31 import org.apache.lucene.document.Document;
32 import org.apache.lucene.document.FieldSelector;
33 import org.apache.lucene.index.*;
34 import org.apache.lucene.store.Directory;
35 import org.apache.lucene.util.BitVector;
38 * An InstantiatedIndexReader is not a snapshot in time, it is completely in
39 * sync with the latest commit to the store!
41 * Consider using InstantiatedIndex as if it was immutable.
43 public class InstantiatedIndexReader extends IndexReader {
45 private final InstantiatedIndex index;
47 public InstantiatedIndexReader(InstantiatedIndex index) {
50 readerFinishedListeners = Collections.synchronizedSet(new HashSet<ReaderFinishedListener>());
54 * @return always true.
57 public boolean isOptimized() {
62 * An InstantiatedIndexReader is not a snapshot in time, it is completely in
63 * sync with the latest commit to the store!
65 * @return output from {@link InstantiatedIndex#getVersion()} in associated instantiated index.
68 public long getVersion() {
69 return index.getVersion();
73 public Directory directory() {
74 throw new UnsupportedOperationException();
78 * An InstantiatedIndexReader is always current!
80 * Check whether this IndexReader is still using the current (i.e., most
81 * recently committed) version of the index. If a writer has committed any
82 * changes to the index since this reader was opened, this will return
83 * <code>false</code>, in which case you must open a new IndexReader in
84 * order to see the changes. See the description of the <a
85 * href="IndexWriter.html#autoCommit"><code>autoCommit</code></a> flag
86 * which controls when the {@link IndexWriter} actually commits changes to the
90 * @throws CorruptIndexException if the index is corrupt
91 * @throws IOException if there is a low-level IO error
92 * @throws UnsupportedOperationException unless overridden in subclass
95 public boolean isCurrent() throws IOException {
99 public InstantiatedIndex getIndex() {
103 private BitVector uncommittedDeletedDocuments;
105 private Map<String,List<NormUpdate>> uncommittedNormsByFieldNameAndDocumentNumber = null;
107 private class NormUpdate {
111 public NormUpdate(int doc, byte value) {
118 public int numDocs() {
119 // todo i suppose this value could be cached, but array#length and bitvector#count is fast.
120 int numDocs = getIndex().getDocumentsByNumber().length;
121 if (uncommittedDeletedDocuments != null) {
122 numDocs -= uncommittedDeletedDocuments.count();
124 if (index.getDeletedDocuments() != null) {
125 numDocs -= index.getDeletedDocuments().count();
131 public int maxDoc() {
132 return getIndex().getDocumentsByNumber().length;
136 public boolean hasDeletions() {
137 return index.getDeletedDocuments() != null || uncommittedDeletedDocuments != null;
142 public boolean isDeleted(int n) {
143 return (index.getDeletedDocuments() != null && index.getDeletedDocuments().get(n))
144 || (uncommittedDeletedDocuments != null && uncommittedDeletedDocuments.get(n));
149 protected void doDelete(int docNum) throws IOException {
151 // dont delete if already deleted
152 if ((index.getDeletedDocuments() != null && index.getDeletedDocuments().get(docNum))
153 || (uncommittedDeletedDocuments != null && uncommittedDeletedDocuments.get(docNum))) {
157 if (uncommittedDeletedDocuments == null) {
158 uncommittedDeletedDocuments = new BitVector(maxDoc());
161 uncommittedDeletedDocuments.set(docNum);
165 protected void doUndeleteAll() throws IOException {
166 // todo: read/write lock
167 uncommittedDeletedDocuments = null;
168 // todo: read/write unlock
172 protected void doCommit(Map<String,String> commitUserData) throws IOException {
173 // todo: read/write lock
176 if (uncommittedNormsByFieldNameAndDocumentNumber != null) {
177 for (Map.Entry<String,List<NormUpdate>> e : uncommittedNormsByFieldNameAndDocumentNumber.entrySet()) {
178 byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(e.getKey());
179 for (NormUpdate normUpdate : e.getValue()) {
180 norms[normUpdate.doc] = normUpdate.value;
183 uncommittedNormsByFieldNameAndDocumentNumber = null;
186 // 2. remove deleted documents
187 if (uncommittedDeletedDocuments != null) {
188 if (index.getDeletedDocuments() == null) {
189 index.setDeletedDocuments(uncommittedDeletedDocuments);
191 for (int d = 0; d< uncommittedDeletedDocuments.size(); d++) {
192 if (uncommittedDeletedDocuments.get(d)) {
193 index.getDeletedDocuments().set(d);
197 uncommittedDeletedDocuments = null;
200 // todo unlock read/writelock
204 protected void doClose() throws IOException {
206 // todo perhaps release all associated instances?
210 public Collection<String> getFieldNames(FieldOption fieldOption) {
211 Set<String> fieldSet = new HashSet<String>();
212 for (FieldSetting fi : index.getFieldSettings().values()) {
213 if (fieldOption == IndexReader.FieldOption.ALL) {
214 fieldSet.add(fi.fieldName);
215 } else if (!fi.indexed && fieldOption == IndexReader.FieldOption.UNINDEXED) {
216 fieldSet.add(fi.fieldName);
217 } else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) {
218 fieldSet.add(fi.fieldName);
219 } else if (fi.indexed && fieldOption == IndexReader.FieldOption.INDEXED) {
220 fieldSet.add(fi.fieldName);
221 } else if (fi.indexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) {
222 fieldSet.add(fi.fieldName);
223 } else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false
224 && fieldOption == IndexReader.FieldOption.TERMVECTOR) {
225 fieldSet.add(fi.fieldName);
226 } else if (fi.indexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) {
227 fieldSet.add(fi.fieldName);
228 } else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false
229 && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) {
230 fieldSet.add(fi.fieldName);
231 } else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false
232 && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) {
233 fieldSet.add(fi.fieldName);
234 } else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector)
235 && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) {
236 fieldSet.add(fi.fieldName);
243 * Return the {@link org.apache.lucene.document.Document} at the <code>n</code><sup>th</sup>
247 * The resulting document is the actual stored document instance
248 * and not a deserialized clone as retuned by an IndexReader
249 * over a {@link org.apache.lucene.store.Directory}.
250 * I.e., if you need to touch the document, clone it first!
252 * This can also be seen as a feature for live changes of stored values,
253 * but be careful! Adding a field with an name unknown to the index
254 * or to a field with previously no stored values will make
255 * {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)}
256 * out of sync, causing problems for instance when merging the
257 * instantiated index to another index.
259 * This implementation ignores the field selector! All stored fields are always returned!
262 * @param n document number
263 * @param fieldSelector ignored
264 * @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position
265 * @throws CorruptIndexException if the index is corrupt
266 * @throws IOException if there is a low-level IO error
268 * @see org.apache.lucene.document.Fieldable
269 * @see org.apache.lucene.document.FieldSelector
270 * @see org.apache.lucene.document.SetBasedFieldSelector
271 * @see org.apache.lucene.document.LoadFirstFieldSelector
274 public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
279 * Returns the stored fields of the <code>n</code><sup>th</sup>
280 * <code>Document</code> in this index.
283 * The resulting document is the actual stored document instance
284 * and not a deserialized clone as retuned by an IndexReader
285 * over a {@link org.apache.lucene.store.Directory}.
286 * I.e., if you need to touch the document, clone it first!
288 * This can also be seen as a feature for live changes of stored values,
289 * but be careful! Adding a field with an name unknown to the index
290 * or to a field with previously no stored values will make
291 * {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)}
292 * out of sync, causing problems for instance when merging the
293 * instantiated index to another index.
295 * @throws CorruptIndexException if the index is corrupt
296 * @throws IOException if there is a low-level IO error
300 public Document document(int n) throws IOException {
301 return isDeleted(n) ? null : getIndex().getDocumentsByNumber()[n].getDocument();
305 * never ever touch these values. it is the true values, unless norms have
309 public byte[] norms(String field) throws IOException {
310 byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field);
312 return new byte[0]; // todo a static final zero length attribute?
314 if (uncommittedNormsByFieldNameAndDocumentNumber != null) {
315 norms = norms.clone();
316 List<NormUpdate> updated = uncommittedNormsByFieldNameAndDocumentNumber.get(field);
317 if (updated != null) {
318 for (NormUpdate normUpdate : updated) {
319 norms[normUpdate.doc] = normUpdate.value;
327 public void norms(String field, byte[] bytes, int offset) throws IOException {
328 byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field);
332 System.arraycopy(norms, 0, bytes, offset, norms.length);
336 protected void doSetNorm(int doc, String field, byte value) throws IOException {
337 if (uncommittedNormsByFieldNameAndDocumentNumber == null) {
338 uncommittedNormsByFieldNameAndDocumentNumber = new HashMap<String,List<NormUpdate>>(getIndex().getNormsByFieldNameAndDocumentNumber().size());
340 List<NormUpdate> list = uncommittedNormsByFieldNameAndDocumentNumber.get(field);
342 list = new LinkedList<NormUpdate>();
343 uncommittedNormsByFieldNameAndDocumentNumber.put(field, list);
345 list.add(new NormUpdate(doc, value));
349 public int docFreq(Term t) throws IOException {
350 InstantiatedTerm term = getIndex().findTerm(t);
354 return term.getAssociatedDocuments().length;
359 public TermEnum terms() throws IOException {
360 return new InstantiatedTermEnum(this);
364 public TermEnum terms(Term t) throws IOException {
365 InstantiatedTerm it = getIndex().findTerm(t);
367 return new InstantiatedTermEnum(this, it.getTermIndex());
369 int startPos = Arrays.binarySearch(index.getOrderedTerms(), t, InstantiatedTerm.termComparator);
371 startPos = -1 - startPos;
373 return new InstantiatedTermEnum(this, startPos);
378 public TermDocs termDocs() throws IOException {
379 return new InstantiatedTermDocs(this);
384 public TermDocs termDocs(Term term) throws IOException {
386 return new InstantiatedAllTermDocs(this);
388 InstantiatedTermDocs termDocs = new InstantiatedTermDocs(this);
395 public TermPositions termPositions() throws IOException {
396 return new InstantiatedTermPositions(this);
400 public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException {
401 InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
402 if (doc.getVectorSpace() == null) {
405 TermFreqVector[] ret = new TermFreqVector[doc.getVectorSpace().size()];
406 Iterator<String> it = doc.getVectorSpace().keySet().iterator();
407 for (int i = 0; i < ret.length; i++) {
408 ret[i] = new InstantiatedTermPositionVector(getIndex().getDocumentsByNumber()[docNumber], it.next());
414 public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {
415 InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
416 if (doc.getVectorSpace() == null || doc.getVectorSpace().get(field) == null) {
419 return new InstantiatedTermPositionVector(doc, field);
424 public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
425 InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
426 if (doc.getVectorSpace() != null && doc.getVectorSpace().get(field) == null) {
427 List<InstantiatedTermDocumentInformation> tv = doc.getVectorSpace().get(field);
428 mapper.setExpectations(field, tv.size(), true, true);
429 for (InstantiatedTermDocumentInformation tdi : tv) {
430 mapper.map(tdi.getTerm().text(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions());
436 public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
437 InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
438 for (Map.Entry<String, List<InstantiatedTermDocumentInformation>> e : doc.getVectorSpace().entrySet()) {
439 mapper.setExpectations(e.getKey(), e.getValue().size(), true, true);
440 for (InstantiatedTermDocumentInformation tdi : e.getValue()) {
441 mapper.map(tdi.getTerm().text(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions());