lucene-java-3.4.0/lucene/src/java/org/apache/lucene/index/FieldsReader.java

   1 package org.apache.lucene.index;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.Closeable;
  21 import java.io.IOException;
  22 import java.io.Reader;
  23 import java.util.zip.DataFormatException;
  24
  25 import org.apache.lucene.analysis.TokenStream;
  26 import org.apache.lucene.document.AbstractField;
  27 import org.apache.lucene.document.CompressionTools;
  28 import org.apache.lucene.document.Document;
  29 import org.apache.lucene.document.Field;
  30 import org.apache.lucene.document.FieldSelector;
  31 import org.apache.lucene.document.FieldSelectorResult;
  32 import org.apache.lucene.document.Fieldable;
  33 import org.apache.lucene.document.NumericField;
  34 import org.apache.lucene.store.AlreadyClosedException;
  35 import org.apache.lucene.store.BufferedIndexInput;
  36 import org.apache.lucene.store.Directory;
  37 import org.apache.lucene.store.IndexInput;
  38 import org.apache.lucene.util.CloseableThreadLocal;
  39 import org.apache.lucene.util.IOUtils;
  40
  41 /**
  42  * Class responsible for access to stored document fields.
  43  * <p/>
  44  * It uses &lt;segment&gt;.fdt and &lt;segment&gt;.fdx; files.
  45  */
  46 final class FieldsReader implements Cloneable, Closeable {
  47   private final FieldInfos fieldInfos;
  48
  49   // The main fieldStream, used only for cloning.
  50   private final IndexInput cloneableFieldsStream;
  51
  52   // This is a clone of cloneableFieldsStream used for reading documents.
  53   // It should not be cloned outside of a synchronized context.
  54   private final IndexInput fieldsStream;
  55
  56   private final IndexInput cloneableIndexStream;
  57   private final IndexInput indexStream;
  58   private int numTotalDocs;
  59   private int size;
  60   private boolean closed;
  61   private final int format;
  62   private final int formatSize;
  63
  64   // The docID offset where our docs begin in the index
  65   // file.  This will be 0 if we have our own private file.
  66   private int docStoreOffset;
  67
  68   private CloseableThreadLocal<IndexInput> fieldsStreamTL = new CloseableThreadLocal<IndexInput>();
  69   private boolean isOriginal = false;
  70
  71   /** Returns a cloned FieldsReader that shares open
  72    *  IndexInputs with the original one.  It is the caller's
  73    *  job not to close the original FieldsReader until all
  74    *  clones are called (eg, currently SegmentReader manages
  75    *  this logic). */
  76   @Override
  77   public Object clone() {
  78     ensureOpen();
  79     return new FieldsReader(fieldInfos, numTotalDocs, size, format, formatSize, docStoreOffset, cloneableFieldsStream, cloneableIndexStream);
  80   }
  81
  82   /**
  83    * Detects the code version this segment was written with. Returns either
  84    * "2.x" for all pre-3.0 segments, or "3.0" for 3.0 segments. This method
  85    * should not be called for 3.1+ segments since they already record their code
  86    * version.
  87    */
  88   static String detectCodeVersion(Directory dir, String segment) throws IOException {
  89     IndexInput idxStream = dir.openInput(IndexFileNames.segmentFileName(segment, IndexFileNames.FIELDS_INDEX_EXTENSION), 1024);
  90     try {
  91       int format = idxStream.readInt();
  92       if (format < FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS) {
  93         return "2.x";
  94       } else {
  95         return "3.0";
  96       }
  97     } finally {
  98       idxStream.close();
  99     }
 100   }
 101
 102   // Used only by clone
 103   private FieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int formatSize,
 104                        int docStoreOffset, IndexInput cloneableFieldsStream, IndexInput cloneableIndexStream) {
 105     this.fieldInfos = fieldInfos;
 106     this.numTotalDocs = numTotalDocs;
 107     this.size = size;
 108     this.format = format;
 109     this.formatSize = formatSize;
 110     this.docStoreOffset = docStoreOffset;
 111     this.cloneableFieldsStream = cloneableFieldsStream;
 112     this.cloneableIndexStream = cloneableIndexStream;
 113     fieldsStream = (IndexInput) cloneableFieldsStream.clone();
 114     indexStream = (IndexInput) cloneableIndexStream.clone();
 115   }
 116
 117   FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
 118     this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE, -1, 0);
 119   }
 120
 121   FieldsReader(Directory d, String segment, FieldInfos fn, int readBufferSize) throws IOException {
 122     this(d, segment, fn, readBufferSize, -1, 0);
 123   }
 124
 125   FieldsReader(Directory d, String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) throws IOException {
 126     boolean success = false;
 127     isOriginal = true;
 128     try {
 129       fieldInfos = fn;
 130
 131       cloneableFieldsStream = d.openInput(IndexFileNames.segmentFileName(segment, IndexFileNames.FIELDS_EXTENSION), readBufferSize);
 132       cloneableIndexStream = d.openInput(IndexFileNames.segmentFileName(segment, IndexFileNames.FIELDS_INDEX_EXTENSION), readBufferSize);
 133
 134       // First version of fdx did not include a format
 135       // header, but, the first int will always be 0 in that
 136       // case
 137       int firstInt = cloneableIndexStream.readInt();
 138       if (firstInt == 0)
 139         format = 0;
 140       else
 141         format = firstInt;
 142
 143       if (format > FieldsWriter.FORMAT_CURRENT)
 144         throw new CorruptIndexException("Incompatible format version: " + format + " expected "
 145                                         + FieldsWriter.FORMAT_CURRENT + " or lower");
 146
 147       if (format > FieldsWriter.FORMAT)
 148         formatSize = 4;
 149       else
 150         formatSize = 0;
 151
 152       if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
 153         cloneableFieldsStream.setModifiedUTF8StringsMode();
 154
 155       fieldsStream = (IndexInput) cloneableFieldsStream.clone();
 156
 157       final long indexSize = cloneableIndexStream.length()-formatSize;
 158
 159       if (docStoreOffset != -1) {
 160         // We read only a slice out of this shared fields file
 161         this.docStoreOffset = docStoreOffset;
 162         this.size = size;
 163
 164         // Verify the file is long enough to hold all of our
 165         // docs
 166         assert ((int) (indexSize / 8)) >= size + this.docStoreOffset: "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset;
 167       } else {
 168         this.docStoreOffset = 0;
 169         this.size = (int) (indexSize >> 3);
 170       }
 171
 172       indexStream = (IndexInput) cloneableIndexStream.clone();
 173       numTotalDocs = (int) (indexSize >> 3);
 174       success = true;
 175     } finally {
 176       // With lock-less commits, it's entirely possible (and
 177       // fine) to hit a FileNotFound exception above. In
 178       // this case, we want to explicitly close any subset
 179       // of things that were opened so that we don't have to
 180       // wait for a GC to do so.
 181       if (!success) {
 182         close();
 183       }
 184     }
 185   }
 186
 187   /**
 188    * @throws AlreadyClosedException if this FieldsReader is closed
 189    */
 190   private void ensureOpen() throws AlreadyClosedException {
 191     if (closed) {
 192       throw new AlreadyClosedException("this FieldsReader is closed");
 193     }
 194   }
 195
 196   /**
 197    * Closes the underlying {@link org.apache.lucene.store.IndexInput} streams, including any ones associated with a
 198    * lazy implementation of a Field.  This means that the Fields values will not be accessible.
 199    *
 200    * @throws IOException
 201    */
 202   public final void close() throws IOException {
 203     if (!closed) {
 204       if (isOriginal) {
 205         IOUtils.close(fieldsStream, indexStream, fieldsStreamTL, cloneableFieldsStream, cloneableIndexStream);
 206       } else {
 207         IOUtils.close(fieldsStream, indexStream, fieldsStreamTL);
 208       }
 209       closed = true;
 210     }
 211   }
 212
 213   final int size() {
 214     return size;
 215   }
 216
 217   private final void seekIndex(int docID) throws IOException {
 218     indexStream.seek(formatSize + (docID + docStoreOffset) * 8L);
 219   }
 220
 221   boolean canReadRawDocs() {
 222     // Disable reading raw docs in 2.x format, because of the removal of compressed
 223     // fields in 3.0. We don't want rawDocs() to decode field bits to figure out
 224     // if a field was compressed, hence we enforce ordinary (non-raw) stored field merges
 225     // for <3.0 indexes.
 226     return format >= FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
 227   }
 228
 229   final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
 230     seekIndex(n);
 231     long position = indexStream.readLong();
 232     fieldsStream.seek(position);
 233
 234     Document doc = new Document();
 235     int numFields = fieldsStream.readVInt();
 236     out: for (int i = 0; i < numFields; i++) {
 237       int fieldNumber = fieldsStream.readVInt();
 238       FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
 239       FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);
 240
 241       int bits = fieldsStream.readByte() & 0xFF;
 242       assert bits <= (FieldsWriter.FIELD_IS_NUMERIC_MASK | FieldsWriter.FIELD_IS_COMPRESSED | FieldsWriter.FIELD_IS_TOKENIZED | FieldsWriter.FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits);
 243
 244       boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
 245       assert (compressed ? (format < FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS) : true)
 246         : "compressed fields are only allowed in indexes of version <= 2.9";
 247       boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
 248       boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
 249       final int numeric = bits & FieldsWriter.FIELD_IS_NUMERIC_MASK;
 250
 251       switch (acceptField) {
 252         case LOAD:
 253           addField(doc, fi, binary, compressed, tokenize, numeric);
 254           break;
 255         case LOAD_AND_BREAK:
 256           addField(doc, fi, binary, compressed, tokenize, numeric);
 257           break out; //Get out of this loop
 258         case LAZY_LOAD:
 259           addFieldLazy(doc, fi, binary, compressed, tokenize, true, numeric);
 260           break;
 261         case LATENT:
 262           addFieldLazy(doc, fi, binary, compressed, tokenize, false, numeric);
 263           break;
 264         case SIZE:
 265           skipFieldBytes(binary, compressed, addFieldSize(doc, fi, binary, compressed, numeric));
 266           break;
 267         case SIZE_AND_BREAK:
 268           addFieldSize(doc, fi, binary, compressed, numeric);
 269           break out; //Get out of this loop
 270         default:
 271           skipField(binary, compressed, numeric);
 272       }
 273     }
 274
 275     return doc;
 276   }
 277
 278   /** Returns the length in bytes of each raw document in a
 279    *  contiguous range of length numDocs starting with
 280    *  startDocID.  Returns the IndexInput (the fieldStream),
 281    *  already seeked to the starting point for startDocID.*/
 282   final IndexInput rawDocs(int[] lengths, int startDocID, int numDocs) throws IOException {
 283     seekIndex(startDocID);
 284     long startOffset = indexStream.readLong();
 285     long lastOffset = startOffset;
 286     int count = 0;
 287     while (count < numDocs) {
 288       final long offset;
 289       final int docID = docStoreOffset + startDocID + count + 1;
 290       assert docID <= numTotalDocs;
 291       if (docID < numTotalDocs)
 292         offset = indexStream.readLong();
 293       else
 294         offset = fieldsStream.length();
 295       lengths[count++] = (int) (offset-lastOffset);
 296       lastOffset = offset;
 297     }
 298
 299     fieldsStream.seek(startOffset);
 300
 301     return fieldsStream;
 302   }
 303
 304   /**
 305    * Skip the field.  We still have to read some of the information about the field, but can skip past the actual content.
 306    * This will have the most payoff on large fields.
 307    */
 308   private void skipField(boolean binary, boolean compressed, int numeric) throws IOException {
 309     final int numBytes;
 310     switch(numeric) {
 311       case 0:
 312         numBytes = fieldsStream.readVInt();
 313         break;
 314       case FieldsWriter.FIELD_IS_NUMERIC_INT:
 315       case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
 316         numBytes = 4;
 317         break;
 318       case FieldsWriter.FIELD_IS_NUMERIC_LONG:
 319       case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
 320         numBytes = 8;
 321         break;
 322       default:
 323         throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric));
 324     }
 325
 326     skipFieldBytes(binary, compressed, numBytes);
 327   }
 328
 329   private void skipFieldBytes(boolean binary, boolean compressed, int toRead) throws IOException {
 330     if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary || compressed) {
 331       fieldsStream.seek(fieldsStream.getFilePointer() + toRead);
 332     } else {
 333       // We need to skip chars.  This will slow us down, but still better
 334       fieldsStream.skipChars(toRead);
 335     }
 336   }
 337
 338   private NumericField loadNumericField(FieldInfo fi, int numeric) throws IOException {
 339     assert numeric != 0;
 340     switch(numeric) {
 341       case FieldsWriter.FIELD_IS_NUMERIC_INT:
 342         return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setIntValue(fieldsStream.readInt());
 343       case FieldsWriter.FIELD_IS_NUMERIC_LONG:
 344         return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setLongValue(fieldsStream.readLong());
 345       case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
 346         return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setFloatValue(Float.intBitsToFloat(fieldsStream.readInt()));
 347       case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
 348         return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setDoubleValue(Double.longBitsToDouble(fieldsStream.readLong()));
 349       default:
 350         throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric));
 351     }
 352   }
 353
 354   private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize, boolean cacheResult, int numeric) throws IOException {
 355     final AbstractField f;
 356     if (binary) {
 357       int toRead = fieldsStream.readVInt();
 358       long pointer = fieldsStream.getFilePointer();
 359       f = new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary, compressed, cacheResult);
 360       //Need to move the pointer ahead by toRead positions
 361       fieldsStream.seek(pointer + toRead);
 362     } else if (numeric != 0) {
 363       f = loadNumericField(fi, numeric);
 364     } else {
 365       Field.Store store = Field.Store.YES;
 366       Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize);
 367       Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);
 368
 369       if (compressed) {
 370         int toRead = fieldsStream.readVInt();
 371         long pointer = fieldsStream.getFilePointer();
 372         f = new LazyField(fi.name, store, toRead, pointer, binary, compressed, cacheResult);
 373         //skip over the part that we aren't loading
 374         fieldsStream.seek(pointer + toRead);
 375       } else {
 376         int length = fieldsStream.readVInt();
 377         long pointer = fieldsStream.getFilePointer();
 378         //Skip ahead of where we are by the length of what is stored
 379         if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) {
 380           fieldsStream.seek(pointer+length);
 381         } else {
 382           fieldsStream.skipChars(length);
 383         }
 384         f = new LazyField(fi.name, store, index, termVector, length, pointer, binary, compressed, cacheResult);
 385       }
 386     }
 387
 388     f.setOmitNorms(fi.omitNorms);
 389     f.setIndexOptions(fi.indexOptions);
 390     doc.add(f);
 391   }
 392
 393   private void addField(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize, int numeric) throws CorruptIndexException, IOException {
 394     final AbstractField f;
 395
 396     //we have a binary stored field, and it may be compressed
 397     if (binary) {
 398       int toRead = fieldsStream.readVInt();
 399       final byte[] b = new byte[toRead];
 400       fieldsStream.readBytes(b, 0, b.length);
 401       if (compressed) {
 402         f = new Field(fi.name, uncompress(b));
 403       } else {
 404         f = new Field(fi.name, b);
 405       }
 406     } else if (numeric != 0) {
 407       f = loadNumericField(fi, numeric);
 408     } else {
 409       Field.Store store = Field.Store.YES;
 410       Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize);
 411       Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);
 412       if (compressed) {
 413         int toRead = fieldsStream.readVInt();
 414         final byte[] b = new byte[toRead];
 415         fieldsStream.readBytes(b, 0, b.length);
 416         f = new Field(fi.name,      // field name
 417                 false,
 418                 new String(uncompress(b), "UTF-8"), // uncompress the value and add as string
 419                 store,
 420                 index,
 421                 termVector);
 422       } else {
 423         f = new Field(fi.name,     // name
 424          false,
 425                 fieldsStream.readString(), // read value
 426                 store,
 427                 index,
 428                 termVector);
 429       }
 430     }
 431
 432     f.setIndexOptions(fi.indexOptions);
 433     f.setOmitNorms(fi.omitNorms);
 434     doc.add(f);
 435   }
 436
 437   // Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
 438   // Read just the size -- caller must skip the field content to continue reading fields
 439   // Return the size in bytes or chars, depending on field type
 440   private int addFieldSize(Document doc, FieldInfo fi, boolean binary, boolean compressed, int numeric) throws IOException {
 441     final int bytesize, size;
 442     switch(numeric) {
 443       case 0:
 444         size = fieldsStream.readVInt();
 445         bytesize = (binary || compressed) ? size : 2*size;
 446         break;
 447       case FieldsWriter.FIELD_IS_NUMERIC_INT:
 448       case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
 449         size = bytesize = 4;
 450         break;
 451       case FieldsWriter.FIELD_IS_NUMERIC_LONG:
 452       case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
 453         size = bytesize = 8;
 454         break;
 455       default:
 456         throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric));
 457     }
 458     byte[] sizebytes = new byte[4];
 459     sizebytes[0] = (byte) (bytesize>>>24);
 460     sizebytes[1] = (byte) (bytesize>>>16);
 461     sizebytes[2] = (byte) (bytesize>>> 8);
 462     sizebytes[3] = (byte)  bytesize      ;
 463     doc.add(new Field(fi.name, sizebytes));
 464     return size;
 465   }
 466
 467   /**
 468    * A Lazy implementation of Fieldable that defers loading of fields until asked for, instead of when the Document is
 469    * loaded.
 470    */
 471   private class LazyField extends AbstractField implements Fieldable {
 472     private int toRead;
 473     private long pointer;
 474     /** @deprecated Only kept for backward-compatbility with <3.0 indexes. Will be removed in 4.0. */
 475     @Deprecated
 476     private boolean isCompressed;
 477         private boolean cacheResult;
 478
 479     public LazyField(String name, Field.Store store, int toRead, long pointer, boolean isBinary, boolean isCompressed, boolean cacheResult) {
 480       super(name, store, Field.Index.NO, Field.TermVector.NO);
 481       this.toRead = toRead;
 482       this.pointer = pointer;
 483       this.isBinary = isBinary;
 484           this.cacheResult = cacheResult;
 485       if (isBinary)
 486         binaryLength = toRead;
 487       lazy = true;
 488       this.isCompressed = isCompressed;
 489     }
 490
 491     public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, boolean isBinary, boolean isCompressed, boolean cacheResult) {
 492       super(name, store, index, termVector);
 493       this.toRead = toRead;
 494       this.pointer = pointer;
 495       this.isBinary = isBinary;
 496           this.cacheResult = cacheResult;
 497       if (isBinary)
 498         binaryLength = toRead;
 499       lazy = true;
 500       this.isCompressed = isCompressed;
 501     }
 502
 503     private IndexInput getFieldStream() {
 504       IndexInput localFieldsStream = fieldsStreamTL.get();
 505       if (localFieldsStream == null) {
 506         localFieldsStream = (IndexInput) cloneableFieldsStream.clone();
 507         fieldsStreamTL.set(localFieldsStream);
 508       }
 509       return localFieldsStream;
 510     }
 511
 512     /** The value of the field as a Reader, or null.  If null, the String value,
 513      * binary value, or TokenStream value is used.  Exactly one of stringValue(),
 514      * readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
 515     public Reader readerValue() {
 516       ensureOpen();
 517       return null;
 518     }
 519
 520     /** The value of the field as a TokenStream, or null.  If null, the Reader value,
 521      * String value, or binary value is used. Exactly one of stringValue(),
 522      * readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
 523     public TokenStream tokenStreamValue() {
 524       ensureOpen();
 525       return null;
 526     }
 527
 528     /** The value of the field as a String, or null.  If null, the Reader value,
 529      * binary value, or TokenStream value is used.  Exactly one of stringValue(),
 530      * readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
 531     public String stringValue() {
 532       ensureOpen();
 533       if (isBinary)
 534         return null;
 535       else {
 536         if (fieldsData == null) {
 537           IndexInput localFieldsStream = getFieldStream();
 538                   String value;
 539           try {
 540             localFieldsStream.seek(pointer);
 541             if (isCompressed) {
 542               final byte[] b = new byte[toRead];
 543               localFieldsStream.readBytes(b, 0, b.length);
 544               value = new String(uncompress(b), "UTF-8");
 545             } else {
 546               if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) {
 547                 byte[] bytes = new byte[toRead];
 548                 localFieldsStream.readBytes(bytes, 0, toRead);
 549                 value = new String(bytes, "UTF-8");
 550               } else {
 551                 //read in chars b/c we already know the length we need to read
 552                 char[] chars = new char[toRead];
 553                 localFieldsStream.readChars(chars, 0, toRead);
 554                 value = new String(chars);
 555               }
 556             }
 557           } catch (IOException e) {
 558             throw new FieldReaderException(e);
 559           }
 560           if (cacheResult){
 561             fieldsData = value;
 562           }
 563           return value;
 564         } else{
 565           return (String) fieldsData;
 566         }
 567
 568       }
 569     }
 570
 571     public long getPointer() {
 572       ensureOpen();
 573       return pointer;
 574     }
 575
 576     public void setPointer(long pointer) {
 577       ensureOpen();
 578       this.pointer = pointer;
 579     }
 580
 581     public int getToRead() {
 582       ensureOpen();
 583       return toRead;
 584     }
 585
 586     public void setToRead(int toRead) {
 587       ensureOpen();
 588       this.toRead = toRead;
 589     }
 590
 591     @Override
 592     public byte[] getBinaryValue(byte[] result) {
 593       ensureOpen();
 594
 595       if (isBinary) {
 596         if (fieldsData == null) {
 597           // Allocate new buffer if result is null or too small
 598           final byte[] b;
 599                   byte[] value;
 600           if (result == null || result.length < toRead)
 601             b = new byte[toRead];
 602           else
 603             b = result;
 604
 605           IndexInput localFieldsStream = getFieldStream();
 606
 607           // Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people
 608           // since they are already handling this exception when getting the document
 609           try {
 610             localFieldsStream.seek(pointer);
 611             localFieldsStream.readBytes(b, 0, toRead);
 612             if (isCompressed == true) {
 613               value = uncompress(b);
 614             } else {
 615               value = b;
 616             }
 617           } catch (IOException e) {
 618             throw new FieldReaderException(e);
 619           }
 620
 621           binaryOffset = 0;
 622           binaryLength = toRead;
 623           if (cacheResult == true){
 624             fieldsData = value;
 625           }
 626           return value;
 627         } else{
 628           return (byte[]) fieldsData;
 629         }
 630       } else {
 631         return null;
 632       }
 633     }
 634   }
 635
 636   private byte[] uncompress(byte[] b)
 637           throws CorruptIndexException {
 638     try {
 639       return CompressionTools.decompress(b);
 640     } catch (DataFormatException e) {
 641       // this will happen if the field is not compressed
 642       CorruptIndexException newException = new CorruptIndexException("field data are in wrong format: " + e.toString());
 643       newException.initCause(e);
 644       throw newException;
 645     }
 646   }
 647 }