lucene-java-3.5.0/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java

   1 package org.apache.lucene.store.instantiated;
   2
   3 /**
   4  * Copyright 2006 The Apache Software Foundation
   5  *
   6  * Licensed under the Apache License, Version 2.0 (the "License");
   7  * you may not use this file except in compliance with the License.
   8  * You may obtain a copy of the License at
   9  *
  10  *     http://www.apache.org/licenses/LICENSE-2.0
  11  *
  12  * Unless required by applicable law or agreed to in writing, software
  13  * distributed under the License is distributed on an "AS IS" BASIS,
  14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15  * See the License for the specific language governing permissions and
  16  * limitations under the License.
  17  */
  18
  19 import java.io.IOException;
  20 import java.util.Arrays;
  21 import java.util.Collection;
  22 import java.util.Collections;
  23 import java.util.HashMap;
  24 import java.util.HashSet;
  25 import java.util.Iterator;
  26 import java.util.LinkedList;
  27 import java.util.List;
  28 import java.util.Map;
  29 import java.util.Set;
  30
  31 import org.apache.lucene.document.Document;
  32 import org.apache.lucene.document.FieldSelector;
  33 import org.apache.lucene.index.*;
  34 import org.apache.lucene.store.Directory;
  35 import org.apache.lucene.util.BitVector;
  36
  37 /**
  38  * An InstantiatedIndexReader is not a snapshot in time, it is completely in
  39  * sync with the latest commit to the store!
  40  * <p>
  41  * Consider using InstantiatedIndex as if it was immutable.
  42  */
  43 public class InstantiatedIndexReader extends IndexReader {
  44
  45   private final InstantiatedIndex index;
  46
  47   public InstantiatedIndexReader(InstantiatedIndex index) {
  48     super();
  49     this.index = index;
  50     readerFinishedListeners = Collections.synchronizedSet(new HashSet<ReaderFinishedListener>());
  51   }
  52
  53   @Deprecated
  54   @Override
  55   public boolean isOptimized() {
  56     return true;
  57   }
  58
  59   /**
  60    * An InstantiatedIndexReader is not a snapshot in time, it is completely in
  61    * sync with the latest commit to the store!
  62    *
  63    * @return output from {@link InstantiatedIndex#getVersion()} in associated instantiated index.
  64    */
  65   @Override
  66   public long getVersion() {
  67     return index.getVersion();
  68   }
  69
  70   @Override
  71   public Directory directory() {
  72     throw new UnsupportedOperationException();
  73   }
  74
  75   /**
  76    * An InstantiatedIndexReader is always current!
  77    *
  78    * Check whether this IndexReader is still using the current (i.e., most
  79    * recently committed) version of the index. If a writer has committed any
  80    * changes to the index since this reader was opened, this will return
  81    * <code>false</code>, in which case you must open a new IndexReader in
  82    * order to see the changes. See the description of the <a
  83    * href="IndexWriter.html#autoCommit"><code>autoCommit</code></a> flag
  84    * which controls when the {@link IndexWriter} actually commits changes to the
  85    * index.
  86    *
  87    * @return always true
  88    * @throws CorruptIndexException if the index is corrupt
  89    * @throws IOException if there is a low-level IO error
  90    * @throws UnsupportedOperationException unless overridden in subclass
  91    */
  92   @Override
  93   public boolean isCurrent() throws IOException {
  94     return true;
  95   }
  96
  97   public InstantiatedIndex getIndex() {
  98     return index;
  99   }
 100
 101   private BitVector uncommittedDeletedDocuments;
 102
 103   private Map<String,List<NormUpdate>> uncommittedNormsByFieldNameAndDocumentNumber = null;
 104
 105   private class NormUpdate {
 106     private int doc;
 107     private byte value;
 108
 109     public NormUpdate(int doc, byte value) {
 110       this.doc = doc;
 111       this.value = value;
 112     }
 113   }
 114
 115   @Override
 116   public int numDocs() {
 117     // todo i suppose this value could be cached, but array#length and bitvector#count is fast.
 118     int numDocs = getIndex().getDocumentsByNumber().length;
 119     if (uncommittedDeletedDocuments != null) {
 120       numDocs -= uncommittedDeletedDocuments.count();
 121     }
 122     if (index.getDeletedDocuments() != null) {
 123       numDocs -= index.getDeletedDocuments().count();
 124     }
 125     return numDocs;
 126   }
 127
 128   @Override
 129   public int maxDoc() {
 130     return getIndex().getDocumentsByNumber().length;
 131   }
 132
 133   @Override
 134   public boolean hasDeletions() {
 135     return index.getDeletedDocuments() != null || uncommittedDeletedDocuments != null;
 136   }
 137
 138
 139   @Override
 140   public boolean isDeleted(int n) {
 141     return (index.getDeletedDocuments() != null && index.getDeletedDocuments().get(n))
 142         || (uncommittedDeletedDocuments != null && uncommittedDeletedDocuments.get(n));
 143   }
 144
 145
 146   @Override
 147   protected void doDelete(int docNum) throws IOException {
 148
 149     // dont delete if already deleted
 150     if ((index.getDeletedDocuments() != null && index.getDeletedDocuments().get(docNum))
 151         || (uncommittedDeletedDocuments != null && uncommittedDeletedDocuments.get(docNum))) {
 152       return;
 153     }
 154
 155     if (uncommittedDeletedDocuments == null) {
 156       uncommittedDeletedDocuments = new BitVector(maxDoc());
 157     }
 158
 159     uncommittedDeletedDocuments.set(docNum);
 160   }
 161
 162   @Override
 163   protected void doUndeleteAll() throws IOException {
 164     // todo: read/write lock
 165     uncommittedDeletedDocuments = null;
 166     // todo: read/write unlock
 167   }
 168
 169   @Override
 170   protected void doCommit(Map<String,String> commitUserData) throws IOException {
 171     // todo: read/write lock
 172
 173     // 1. update norms
 174     if (uncommittedNormsByFieldNameAndDocumentNumber != null) {
 175       for (Map.Entry<String,List<NormUpdate>> e : uncommittedNormsByFieldNameAndDocumentNumber.entrySet()) {
 176         byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(e.getKey());
 177         for (NormUpdate normUpdate : e.getValue()) {
 178           norms[normUpdate.doc] = normUpdate.value;
 179         }
 180       }
 181       uncommittedNormsByFieldNameAndDocumentNumber = null;
 182     }
 183
 184     // 2. remove deleted documents
 185     if (uncommittedDeletedDocuments != null) {
 186       if (index.getDeletedDocuments() == null) {
 187         index.setDeletedDocuments(uncommittedDeletedDocuments);
 188       } else {
 189         for (int d = 0; d< uncommittedDeletedDocuments.size(); d++) {
 190           if (uncommittedDeletedDocuments.get(d)) {
 191             index.getDeletedDocuments().set(d);
 192           }
 193         }
 194       }
 195       uncommittedDeletedDocuments = null;
 196     }
 197
 198     // todo unlock read/writelock
 199   }
 200
 201   @Override
 202   protected void doClose() throws IOException {
 203     // ignored
 204     // todo perhaps release all associated instances?
 205   }
 206
 207   @Override
 208   public Collection<String> getFieldNames(FieldOption fieldOption) {
 209     Set<String> fieldSet = new HashSet<String>();
 210     for (FieldSetting fi : index.getFieldSettings().values()) {
 211       if (fieldOption == IndexReader.FieldOption.ALL) {
 212         fieldSet.add(fi.fieldName);
 213       } else if (!fi.indexed && fieldOption == IndexReader.FieldOption.UNINDEXED) {
 214         fieldSet.add(fi.fieldName);
 215       } else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) {
 216         fieldSet.add(fi.fieldName);
 217       } else if (fi.indexed && fieldOption == IndexReader.FieldOption.INDEXED) {
 218         fieldSet.add(fi.fieldName);
 219       } else if (fi.indexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) {
 220         fieldSet.add(fi.fieldName);
 221       } else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false
 222           && fieldOption == IndexReader.FieldOption.TERMVECTOR) {
 223         fieldSet.add(fi.fieldName);
 224       } else if (fi.indexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) {
 225         fieldSet.add(fi.fieldName);
 226       } else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false
 227           && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) {
 228         fieldSet.add(fi.fieldName);
 229       } else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false
 230           && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) {
 231         fieldSet.add(fi.fieldName);
 232       } else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector)
 233           && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) {
 234         fieldSet.add(fi.fieldName);
 235       }
 236     }
 237     return fieldSet;
 238   }
 239
 240   /**
 241    * Return the {@link org.apache.lucene.document.Document} at the <code>n</code><sup>th</sup>
 242    * position.
 243      <p>
 244    * <b>Warning!</b>
 245    * The resulting document is the actual stored document instance
 246    * and not a deserialized clone as retuned by an IndexReader
 247    * over a {@link org.apache.lucene.store.Directory}.
 248    * I.e., if you need to touch the document, clone it first!
 249    * <p>
 250    * This can also be seen as a feature for live changes of stored values,
 251    * but be careful! Adding a field with an name unknown to the index
 252    * or to a field with previously no stored values will make
 253    * {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)}
 254    * out of sync, causing problems for instance when merging the
 255    * instantiated index to another index.
 256      <p>
 257    * This implementation ignores the field selector! All stored fields are always returned!
 258    * <p>
 259    *
 260    * @param n document number
 261    * @param fieldSelector ignored
 262    * @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position
 263    * @throws CorruptIndexException if the index is corrupt
 264    * @throws IOException if there is a low-level IO error
 265    *
 266    * @see org.apache.lucene.document.Fieldable
 267    * @see org.apache.lucene.document.FieldSelector
 268    * @see org.apache.lucene.document.SetBasedFieldSelector
 269    * @see org.apache.lucene.document.LoadFirstFieldSelector
 270    */
 271   @Override
 272   public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
 273     return document(n);
 274   }
 275
 276   /**
 277    * Returns the stored fields of the <code>n</code><sup>th</sup>
 278    * <code>Document</code> in this index.
 279    * <p>
 280    * <b>Warning!</b>
 281    * The resulting document is the actual stored document instance
 282    * and not a deserialized clone as retuned by an IndexReader
 283    * over a {@link org.apache.lucene.store.Directory}.
 284    * I.e., if you need to touch the document, clone it first!
 285    * <p>
 286    * This can also be seen as a feature for live changes of stored values,
 287    * but be careful! Adding a field with an name unknown to the index
 288    * or to a field with previously no stored values will make
 289    * {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)}
 290    * out of sync, causing problems for instance when merging the
 291    * instantiated index to another index.
 292    *
 293    * @throws CorruptIndexException if the index is corrupt
 294    * @throws IOException if there is a low-level IO error
 295    */
 296
 297   @Override
 298   public Document document(int n) throws IOException {
 299     return isDeleted(n) ? null : getIndex().getDocumentsByNumber()[n].getDocument();
 300   }
 301
 302   /**
 303    * never ever touch these values. it is the true values, unless norms have
 304    * been touched.
 305    */
 306   @Override
 307   public byte[] norms(String field) throws IOException {
 308     byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field);
 309     if (norms == null) {
 310       return new byte[0]; // todo a static final zero length attribute?
 311     }
 312     if (uncommittedNormsByFieldNameAndDocumentNumber != null) {
 313       norms = norms.clone();
 314       List<NormUpdate> updated = uncommittedNormsByFieldNameAndDocumentNumber.get(field);
 315       if (updated != null) {
 316         for (NormUpdate normUpdate : updated) {
 317           norms[normUpdate.doc] = normUpdate.value;
 318         }
 319       }
 320     }
 321     return norms;
 322   }
 323
 324   @Override
 325   public void norms(String field, byte[] bytes, int offset) throws IOException {
 326     byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field);
 327     if (norms == null) {
 328       return;
 329     }
 330     System.arraycopy(norms, 0, bytes, offset, norms.length);
 331   }
 332
 333   @Override
 334   protected void doSetNorm(int doc, String field, byte value) throws IOException {
 335     if (uncommittedNormsByFieldNameAndDocumentNumber == null) {
 336       uncommittedNormsByFieldNameAndDocumentNumber = new HashMap<String,List<NormUpdate>>(getIndex().getNormsByFieldNameAndDocumentNumber().size());
 337     }
 338     List<NormUpdate> list = uncommittedNormsByFieldNameAndDocumentNumber.get(field);
 339     if (list == null) {
 340       list = new LinkedList<NormUpdate>();
 341       uncommittedNormsByFieldNameAndDocumentNumber.put(field, list);
 342     }
 343     list.add(new NormUpdate(doc, value));
 344   }
 345
 346   @Override
 347   public int docFreq(Term t) throws IOException {
 348     InstantiatedTerm term = getIndex().findTerm(t);
 349     if (term == null) {
 350       return 0;
 351     } else {
 352       return term.getAssociatedDocuments().length;
 353     }
 354   }
 355
 356   @Override
 357   public TermEnum terms() throws IOException {
 358     return new InstantiatedTermEnum(this);
 359   }
 360
 361   @Override
 362   public TermEnum terms(Term t) throws IOException {
 363     InstantiatedTerm it = getIndex().findTerm(t);
 364     if (it != null) {
 365       return new InstantiatedTermEnum(this, it.getTermIndex());
 366     } else {
 367       int startPos = Arrays.binarySearch(index.getOrderedTerms(), t, InstantiatedTerm.termComparator);
 368       if (startPos < 0) {
 369         startPos = -1 - startPos;
 370       }
 371       return new InstantiatedTermEnum(this, startPos);
 372     }
 373   }
 374
 375   @Override
 376   public TermDocs termDocs() throws IOException {
 377     return new InstantiatedTermDocs(this);
 378   }
 379
 380
 381   @Override
 382   public TermDocs termDocs(Term term) throws IOException {
 383     if (term == null) {
 384       return new InstantiatedAllTermDocs(this);
 385     } else {
 386       InstantiatedTermDocs termDocs = new InstantiatedTermDocs(this);
 387       termDocs.seek(term);
 388       return termDocs;
 389     }
 390   }
 391
 392   @Override
 393   public TermPositions termPositions() throws IOException {
 394     return new InstantiatedTermPositions(this);
 395   }
 396
 397   @Override
 398   public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException {
 399     InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
 400     if (doc.getVectorSpace() == null) {
 401       return null;
 402     }
 403     TermFreqVector[] ret = new TermFreqVector[doc.getVectorSpace().size()];
 404     Iterator<String> it = doc.getVectorSpace().keySet().iterator();
 405     for (int i = 0; i < ret.length; i++) {
 406       ret[i] = new InstantiatedTermPositionVector(getIndex().getDocumentsByNumber()[docNumber], it.next());
 407     }
 408     return ret;
 409   }
 410
 411   @Override
 412   public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {
 413     InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
 414     if (doc.getVectorSpace() == null || doc.getVectorSpace().get(field) == null) {
 415       return null;
 416     } else {
 417       return new InstantiatedTermPositionVector(doc, field);
 418     }
 419   }
 420
 421   @Override
 422   public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
 423     InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
 424     if (doc.getVectorSpace() != null && doc.getVectorSpace().get(field) == null) {
 425       List<InstantiatedTermDocumentInformation> tv = doc.getVectorSpace().get(field);
 426       mapper.setExpectations(field, tv.size(), true, true);
 427       for (InstantiatedTermDocumentInformation tdi : tv) {
 428         mapper.map(tdi.getTerm().text(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions());
 429       }
 430     }
 431   }
 432
 433   @Override
 434   public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
 435     InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
 436     for (Map.Entry<String, List<InstantiatedTermDocumentInformation>> e : doc.getVectorSpace().entrySet()) {
 437       mapper.setExpectations(e.getKey(), e.getValue().size(), true, true);
 438       for (InstantiatedTermDocumentInformation tdi : e.getValue()) {
 439         mapper.map(tdi.getTerm().text(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions());
 440       }
 441     }
 442   }
 443 }