lucene-java-3.4.0/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java

   1 package org.apache.lucene.store.instantiated;
   2
   3 /**
   4  * Copyright 2006 The Apache Software Foundation
   5  *
   6  * Licensed under the Apache License, Version 2.0 (the "License");
   7  * you may not use this file except in compliance with the License.
   8  * You may obtain a copy of the License at
   9  *
  10  *     http://www.apache.org/licenses/LICENSE-2.0
  11  *
  12  * Unless required by applicable law or agreed to in writing, software
  13  * distributed under the License is distributed on an "AS IS" BASIS,
  14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15  * See the License for the specific language governing permissions and
  16  * limitations under the License.
  17  */
  18
  19 import java.io.IOException;
  20 import java.util.Arrays;
  21 import java.util.Collection;
  22 import java.util.Collections;
  23 import java.util.HashMap;
  24 import java.util.HashSet;
  25 import java.util.Iterator;
  26 import java.util.LinkedList;
  27 import java.util.List;
  28 import java.util.Map;
  29 import java.util.Set;
  30
  31 import org.apache.lucene.document.Document;
  32 import org.apache.lucene.document.FieldSelector;
  33 import org.apache.lucene.index.*;
  34 import org.apache.lucene.store.Directory;
  35 import org.apache.lucene.util.BitVector;
  36
  37 /**
  38  * An InstantiatedIndexReader is not a snapshot in time, it is completely in
  39  * sync with the latest commit to the store!
  40  * <p>
  41  * Consider using InstantiatedIndex as if it was immutable.
  42  */
  43 public class InstantiatedIndexReader extends IndexReader {
  44
  45   private final InstantiatedIndex index;
  46
  47   public InstantiatedIndexReader(InstantiatedIndex index) {
  48     super();
  49     this.index = index;
  50     readerFinishedListeners = Collections.synchronizedSet(new HashSet<ReaderFinishedListener>());
  51   }
  52
  53   /**
  54    * @return always true.
  55    */
  56   @Override
  57   public boolean isOptimized() {
  58     return true;
  59   }
  60
  61   /**
  62    * An InstantiatedIndexReader is not a snapshot in time, it is completely in
  63    * sync with the latest commit to the store!
  64    *
  65    * @return output from {@link InstantiatedIndex#getVersion()} in associated instantiated index.
  66    */
  67   @Override
  68   public long getVersion() {
  69     return index.getVersion();
  70   }
  71
  72   @Override
  73   public Directory directory() {
  74     throw new UnsupportedOperationException();
  75   }
  76
  77   /**
  78    * An InstantiatedIndexReader is always current!
  79    *
  80    * Check whether this IndexReader is still using the current (i.e., most
  81    * recently committed) version of the index. If a writer has committed any
  82    * changes to the index since this reader was opened, this will return
  83    * <code>false</code>, in which case you must open a new IndexReader in
  84    * order to see the changes. See the description of the <a
  85    * href="IndexWriter.html#autoCommit"><code>autoCommit</code></a> flag
  86    * which controls when the {@link IndexWriter} actually commits changes to the
  87    * index.
  88    *
  89    * @return always true
  90    * @throws CorruptIndexException if the index is corrupt
  91    * @throws IOException if there is a low-level IO error
  92    * @throws UnsupportedOperationException unless overridden in subclass
  93    */
  94   @Override
  95   public boolean isCurrent() throws IOException {
  96     return true;
  97   }
  98
  99   public InstantiatedIndex getIndex() {
 100     return index;
 101   }
 102
 103   private BitVector uncommittedDeletedDocuments;
 104
 105   private Map<String,List<NormUpdate>> uncommittedNormsByFieldNameAndDocumentNumber = null;
 106
 107   private class NormUpdate {
 108     private int doc;
 109     private byte value;
 110
 111     public NormUpdate(int doc, byte value) {
 112       this.doc = doc;
 113       this.value = value;
 114     }
 115   }
 116
 117   @Override
 118   public int numDocs() {
 119     // todo i suppose this value could be cached, but array#length and bitvector#count is fast.
 120     int numDocs = getIndex().getDocumentsByNumber().length;
 121     if (uncommittedDeletedDocuments != null) {
 122       numDocs -= uncommittedDeletedDocuments.count();
 123     }
 124     if (index.getDeletedDocuments() != null) {
 125       numDocs -= index.getDeletedDocuments().count();
 126     }
 127     return numDocs;
 128   }
 129
 130   @Override
 131   public int maxDoc() {
 132     return getIndex().getDocumentsByNumber().length;
 133   }
 134
 135   @Override
 136   public boolean hasDeletions() {
 137     return index.getDeletedDocuments() != null || uncommittedDeletedDocuments != null;
 138   }
 139
 140
 141   @Override
 142   public boolean isDeleted(int n) {
 143     return (index.getDeletedDocuments() != null && index.getDeletedDocuments().get(n))
 144         || (uncommittedDeletedDocuments != null && uncommittedDeletedDocuments.get(n));
 145   }
 146
 147
 148   @Override
 149   protected void doDelete(int docNum) throws IOException {
 150
 151     // dont delete if already deleted
 152     if ((index.getDeletedDocuments() != null && index.getDeletedDocuments().get(docNum))
 153         || (uncommittedDeletedDocuments != null && uncommittedDeletedDocuments.get(docNum))) {
 154       return;
 155     }
 156
 157     if (uncommittedDeletedDocuments == null) {
 158       uncommittedDeletedDocuments = new BitVector(maxDoc());
 159     }
 160
 161     uncommittedDeletedDocuments.set(docNum);
 162   }
 163
 164   @Override
 165   protected void doUndeleteAll() throws IOException {
 166     // todo: read/write lock
 167     uncommittedDeletedDocuments = null;
 168     // todo: read/write unlock
 169   }
 170
 171   @Override
 172   protected void doCommit(Map<String,String> commitUserData) throws IOException {
 173     // todo: read/write lock
 174
 175     // 1. update norms
 176     if (uncommittedNormsByFieldNameAndDocumentNumber != null) {
 177       for (Map.Entry<String,List<NormUpdate>> e : uncommittedNormsByFieldNameAndDocumentNumber.entrySet()) {
 178         byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(e.getKey());
 179         for (NormUpdate normUpdate : e.getValue()) {
 180           norms[normUpdate.doc] = normUpdate.value;
 181         }
 182       }
 183       uncommittedNormsByFieldNameAndDocumentNumber = null;
 184     }
 185
 186     // 2. remove deleted documents
 187     if (uncommittedDeletedDocuments != null) {
 188       if (index.getDeletedDocuments() == null) {
 189         index.setDeletedDocuments(uncommittedDeletedDocuments);
 190       } else {
 191         for (int d = 0; d< uncommittedDeletedDocuments.size(); d++) {
 192           if (uncommittedDeletedDocuments.get(d)) {
 193             index.getDeletedDocuments().set(d);
 194           }
 195         }
 196       }
 197       uncommittedDeletedDocuments = null;
 198     }
 199
 200     // todo unlock read/writelock
 201   }
 202
 203   @Override
 204   protected void doClose() throws IOException {
 205     // ignored
 206     // todo perhaps release all associated instances?
 207   }
 208
 209   @Override
 210   public Collection<String> getFieldNames(FieldOption fieldOption) {
 211     Set<String> fieldSet = new HashSet<String>();
 212     for (FieldSetting fi : index.getFieldSettings().values()) {
 213       if (fieldOption == IndexReader.FieldOption.ALL) {
 214         fieldSet.add(fi.fieldName);
 215       } else if (!fi.indexed && fieldOption == IndexReader.FieldOption.UNINDEXED) {
 216         fieldSet.add(fi.fieldName);
 217       } else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) {
 218         fieldSet.add(fi.fieldName);
 219       } else if (fi.indexed && fieldOption == IndexReader.FieldOption.INDEXED) {
 220         fieldSet.add(fi.fieldName);
 221       } else if (fi.indexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) {
 222         fieldSet.add(fi.fieldName);
 223       } else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false
 224           && fieldOption == IndexReader.FieldOption.TERMVECTOR) {
 225         fieldSet.add(fi.fieldName);
 226       } else if (fi.indexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) {
 227         fieldSet.add(fi.fieldName);
 228       } else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false
 229           && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) {
 230         fieldSet.add(fi.fieldName);
 231       } else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false
 232           && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) {
 233         fieldSet.add(fi.fieldName);
 234       } else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector)
 235           && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) {
 236         fieldSet.add(fi.fieldName);
 237       }
 238     }
 239     return fieldSet;
 240   }
 241
 242   /**
 243    * Return the {@link org.apache.lucene.document.Document} at the <code>n</code><sup>th</sup>
 244    * position.
 245      <p>
 246    * <b>Warning!</b>
 247    * The resulting document is the actual stored document instance
 248    * and not a deserialized clone as retuned by an IndexReader
 249    * over a {@link org.apache.lucene.store.Directory}.
 250    * I.e., if you need to touch the document, clone it first!
 251    * <p>
 252    * This can also be seen as a feature for live changes of stored values,
 253    * but be careful! Adding a field with an name unknown to the index
 254    * or to a field with previously no stored values will make
 255    * {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)}
 256    * out of sync, causing problems for instance when merging the
 257    * instantiated index to another index.
 258      <p>
 259    * This implementation ignores the field selector! All stored fields are always returned!
 260    * <p>
 261    *
 262    * @param n document number
 263    * @param fieldSelector ignored
 264    * @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position
 265    * @throws CorruptIndexException if the index is corrupt
 266    * @throws IOException if there is a low-level IO error
 267    *
 268    * @see org.apache.lucene.document.Fieldable
 269    * @see org.apache.lucene.document.FieldSelector
 270    * @see org.apache.lucene.document.SetBasedFieldSelector
 271    * @see org.apache.lucene.document.LoadFirstFieldSelector
 272    */
 273   @Override
 274   public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
 275     return document(n);
 276   }
 277
 278   /**
 279    * Returns the stored fields of the <code>n</code><sup>th</sup>
 280    * <code>Document</code> in this index.
 281    * <p>
 282    * <b>Warning!</b>
 283    * The resulting document is the actual stored document instance
 284    * and not a deserialized clone as retuned by an IndexReader
 285    * over a {@link org.apache.lucene.store.Directory}.
 286    * I.e., if you need to touch the document, clone it first!
 287    * <p>
 288    * This can also be seen as a feature for live changes of stored values,
 289    * but be careful! Adding a field with an name unknown to the index
 290    * or to a field with previously no stored values will make
 291    * {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)}
 292    * out of sync, causing problems for instance when merging the
 293    * instantiated index to another index.
 294    *
 295    * @throws CorruptIndexException if the index is corrupt
 296    * @throws IOException if there is a low-level IO error
 297    */
 298
 299   @Override
 300   public Document document(int n) throws IOException {
 301     return isDeleted(n) ? null : getIndex().getDocumentsByNumber()[n].getDocument();
 302   }
 303
 304   /**
 305    * never ever touch these values. it is the true values, unless norms have
 306    * been touched.
 307    */
 308   @Override
 309   public byte[] norms(String field) throws IOException {
 310     byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field);
 311     if (norms == null) {
 312       return new byte[0]; // todo a static final zero length attribute?
 313     }
 314     if (uncommittedNormsByFieldNameAndDocumentNumber != null) {
 315       norms = norms.clone();
 316       List<NormUpdate> updated = uncommittedNormsByFieldNameAndDocumentNumber.get(field);
 317       if (updated != null) {
 318         for (NormUpdate normUpdate : updated) {
 319           norms[normUpdate.doc] = normUpdate.value;
 320         }
 321       }
 322     }
 323     return norms;
 324   }
 325
 326   @Override
 327   public void norms(String field, byte[] bytes, int offset) throws IOException {
 328     byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field);
 329     if (norms == null) {
 330       return;
 331     }
 332     System.arraycopy(norms, 0, bytes, offset, norms.length);
 333   }
 334
 335   @Override
 336   protected void doSetNorm(int doc, String field, byte value) throws IOException {
 337     if (uncommittedNormsByFieldNameAndDocumentNumber == null) {
 338       uncommittedNormsByFieldNameAndDocumentNumber = new HashMap<String,List<NormUpdate>>(getIndex().getNormsByFieldNameAndDocumentNumber().size());
 339     }
 340     List<NormUpdate> list = uncommittedNormsByFieldNameAndDocumentNumber.get(field);
 341     if (list == null) {
 342       list = new LinkedList<NormUpdate>();
 343       uncommittedNormsByFieldNameAndDocumentNumber.put(field, list);
 344     }
 345     list.add(new NormUpdate(doc, value));
 346   }
 347
 348   @Override
 349   public int docFreq(Term t) throws IOException {
 350     InstantiatedTerm term = getIndex().findTerm(t);
 351     if (term == null) {
 352       return 0;
 353     } else {
 354       return term.getAssociatedDocuments().length;
 355     }
 356   }
 357
 358   @Override
 359   public TermEnum terms() throws IOException {
 360     return new InstantiatedTermEnum(this);
 361   }
 362
 363   @Override
 364   public TermEnum terms(Term t) throws IOException {
 365     InstantiatedTerm it = getIndex().findTerm(t);
 366     if (it != null) {
 367       return new InstantiatedTermEnum(this, it.getTermIndex());
 368     } else {
 369       int startPos = Arrays.binarySearch(index.getOrderedTerms(), t, InstantiatedTerm.termComparator);
 370       if (startPos < 0) {
 371         startPos = -1 - startPos;
 372       }
 373       return new InstantiatedTermEnum(this, startPos);
 374     }
 375   }
 376
 377   @Override
 378   public TermDocs termDocs() throws IOException {
 379     return new InstantiatedTermDocs(this);
 380   }
 381
 382
 383   @Override
 384   public TermDocs termDocs(Term term) throws IOException {
 385     if (term == null) {
 386       return new InstantiatedAllTermDocs(this);
 387     } else {
 388       InstantiatedTermDocs termDocs = new InstantiatedTermDocs(this);
 389       termDocs.seek(term);
 390       return termDocs;
 391     }
 392   }
 393
 394   @Override
 395   public TermPositions termPositions() throws IOException {
 396     return new InstantiatedTermPositions(this);
 397   }
 398
 399   @Override
 400   public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException {
 401     InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
 402     if (doc.getVectorSpace() == null) {
 403       return null;
 404     }
 405     TermFreqVector[] ret = new TermFreqVector[doc.getVectorSpace().size()];
 406     Iterator<String> it = doc.getVectorSpace().keySet().iterator();
 407     for (int i = 0; i < ret.length; i++) {
 408       ret[i] = new InstantiatedTermPositionVector(getIndex().getDocumentsByNumber()[docNumber], it.next());
 409     }
 410     return ret;
 411   }
 412
 413   @Override
 414   public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {
 415     InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
 416     if (doc.getVectorSpace() == null || doc.getVectorSpace().get(field) == null) {
 417       return null;
 418     } else {
 419       return new InstantiatedTermPositionVector(doc, field);
 420     }
 421   }
 422
 423   @Override
 424   public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
 425     InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
 426     if (doc.getVectorSpace() != null && doc.getVectorSpace().get(field) == null) {
 427       List<InstantiatedTermDocumentInformation> tv = doc.getVectorSpace().get(field);
 428       mapper.setExpectations(field, tv.size(), true, true);
 429       for (InstantiatedTermDocumentInformation tdi : tv) {
 430         mapper.map(tdi.getTerm().text(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions());
 431       }
 432     }
 433   }
 434
 435   @Override
 436   public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
 437     InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
 438     for (Map.Entry<String, List<InstantiatedTermDocumentInformation>> e : doc.getVectorSpace().entrySet()) {
 439       mapper.setExpectations(e.getKey(), e.getValue().size(), true, true);
 440       for (InstantiatedTermDocumentInformation tdi : e.getValue()) {
 441         mapper.map(tdi.getTerm().text(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions());
 442       }
 443     }
 444   }
 445 }