lucene-java-3.4.0/lucene/contrib/misc/src/java/org/apache/lucene/index/TermVectorAccessor.java

   1 package org.apache.lucene.index;
   2
   3 import org.apache.lucene.util.StringHelper;
   4
   5 import java.io.IOException;
   6 import java.util.ArrayList;
   7 import java.util.List;
   8 /*
   9  *  Licensed under the Apache License, Version 2.0 (the "License");
  10  *  you may not use this file except in compliance with the License.
  11  *  You may obtain a copy of the License at
  12  *
  13  *      http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  *  Unless required by applicable law or agreed to in writing, software
  16  *  distributed under the License is distributed on an "AS IS" BASIS,
  17  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  18  *  See the License for the specific language governing permissions and
  19  *  limitations under the License.
  20  *
  21  */
  22
  23
  24 /**
  25  * Transparent access to the vector space model,
  26  * either via TermFreqVector or by resolving it from the inverted index.
  27  * <p/>
  28  * Resolving a term vector from a large index can be a time consuming process.
  29  * <p/>
  30  * Warning! This class is not thread safe!
  31  */
  32 public class TermVectorAccessor {
  33
  34   public TermVectorAccessor() {
  35   }
  36
  37   /**
  38    * Instance reused to save garbage collector some time
  39    */
  40   private TermVectorMapperDecorator decoratedMapper = new TermVectorMapperDecorator();
  41
  42
  43   /**
  44    * Visits the TermVectorMapper and populates it with terms available for a given document,
  45    * either via a vector created at index time or by resolving them from the inverted index.
  46    *
  47    * @param indexReader    Index source
  48    * @param documentNumber Source document to access
  49    * @param fieldName      Field to resolve
  50    * @param mapper         Mapper to be mapped with data
  51    * @throws IOException
  52    */
  53   public void accept(IndexReader indexReader, int documentNumber, String fieldName, TermVectorMapper mapper) throws IOException {
  54
  55     fieldName = StringHelper.intern(fieldName);
  56
  57     decoratedMapper.decorated = mapper;
  58     decoratedMapper.termVectorStored = false;
  59
  60     indexReader.getTermFreqVector(documentNumber, fieldName, decoratedMapper);
  61
  62     if (!decoratedMapper.termVectorStored) {
  63       mapper.setDocumentNumber(documentNumber);
  64       build(indexReader, fieldName, mapper, documentNumber);
  65     }
  66   }
  67
  68   /** Instance reused to save garbage collector some time */
  69   private List<String> tokens;
  70
  71   /** Instance reused to save garbage collector some time */
  72   private List<int[]> positions;
  73
  74   /** Instance reused to save garbage collector some time */
  75   private List<Integer> frequencies;
  76
  77
  78   /**
  79    * Populates the mapper with terms available for the given field in a document
  80    * by resolving the inverted index.
  81    *
  82    * @param indexReader
  83    * @param field interned field name
  84    * @param mapper
  85    * @param documentNumber
  86    * @throws IOException
  87    */
  88   private void build(IndexReader indexReader, String field, TermVectorMapper mapper, int documentNumber) throws IOException {
  89
  90     if (tokens == null) {
  91       tokens = new ArrayList<String>(500);
  92       positions = new ArrayList<int[]>(500);
  93       frequencies = new ArrayList<Integer>(500);
  94     } else {
  95       tokens.clear();
  96       frequencies.clear();
  97       positions.clear();
  98     }
  99
 100     TermEnum termEnum = indexReader.terms(new Term(field, ""));
 101     if (termEnum.term() != null) {
 102       while (termEnum.term().field() == field) {
 103         TermPositions termPositions = indexReader.termPositions(termEnum.term());
 104         if (termPositions.skipTo(documentNumber)) {
 105
 106           frequencies.add(Integer.valueOf(termPositions.freq()));
 107           tokens.add(termEnum.term().text());
 108
 109
 110           if (!mapper.isIgnoringPositions()) {
 111             int[] positions = new int[termPositions.freq()];
 112             for (int i = 0; i < positions.length; i++) {
 113               positions[i] = termPositions.nextPosition();
 114             }
 115             this.positions.add(positions);
 116           } else {
 117             positions.add(null);
 118           }
 119         }
 120         termPositions.close();
 121         if (!termEnum.next()) {
 122           break;
 123         }
 124       }
 125       mapper.setDocumentNumber(documentNumber);
 126       mapper.setExpectations(field, tokens.size(), false, !mapper.isIgnoringPositions());
 127       for (int i = 0; i < tokens.size(); i++) {
 128         mapper.map(tokens.get(i), frequencies.get(i).intValue(), (TermVectorOffsetInfo[]) null, positions.get(i));
 129       }
 130     }
 131     termEnum.close();
 132
 133
 134   }
 135
 136
 137   private static class TermVectorMapperDecorator extends TermVectorMapper {
 138
 139     private TermVectorMapper decorated;
 140
 141     @Override
 142     public boolean isIgnoringPositions() {
 143       return decorated.isIgnoringPositions();
 144     }
 145
 146     @Override
 147     public boolean isIgnoringOffsets() {
 148       return decorated.isIgnoringOffsets();
 149     }
 150
 151     private boolean termVectorStored = false;
 152
 153     @Override
 154     public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions) {
 155       decorated.setExpectations(field, numTerms, storeOffsets, storePositions);
 156       termVectorStored = true;
 157     }
 158
 159     @Override
 160     public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
 161       decorated.map(term, frequency, offsets, positions);
 162     }
 163
 164     @Override
 165     public void setDocumentNumber(int documentNumber) {
 166       decorated.setDocumentNumber(documentNumber);
 167     }
 168   }
 169
 170 }