lucene-java-3.5.0/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermFreqVector.java

   1 package org.apache.lucene.store.instantiated;
   2
   3 import org.apache.lucene.index.TermFreqVector;
   4
   5 import java.io.Serializable;
   6 import java.util.Arrays;
   7 import java.util.List;
   8
   9 /**
  10  * Copyright 2006 The Apache Software Foundation
  11  *
  12  * Licensed under the Apache License, Version 2.0 (the "License");
  13  * you may not use this file except in compliance with the License.
  14  * You may obtain a copy of the License at
  15  *
  16  *     http://www.apache.org/licenses/LICENSE-2.0
  17  *
  18  * Unless required by applicable law or agreed to in writing, software
  19  * distributed under the License is distributed on an "AS IS" BASIS,
  20  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  21  * See the License for the specific language governing permissions and
  22  * limitations under the License.
  23  */
  24
  25 /**
  26  * Vector space view of a document in an {@link InstantiatedIndexReader}.
  27  *
  28  * @see org.apache.lucene.index.TermFreqVector
  29  */
  30 public class InstantiatedTermFreqVector
  31     implements TermFreqVector, Serializable {
  32
  33   private static final long serialVersionUID = 1l;
  34
  35   private final List<InstantiatedTermDocumentInformation> termDocumentInformations;
  36   private final String field;
  37   private final String terms[];
  38   private final int termFrequencies[];
  39
  40   public InstantiatedTermFreqVector(InstantiatedDocument document, String field) {
  41     this.field = field;
  42     termDocumentInformations = document.getVectorSpace().get(field);
  43     terms = new String[termDocumentInformations.size()];
  44     termFrequencies = new int[termDocumentInformations.size()];
  45
  46     for (int i = 0; i < termDocumentInformations.size(); i++) {
  47       InstantiatedTermDocumentInformation termDocumentInformation = termDocumentInformations.get(i);
  48       terms[i] = termDocumentInformation.getTerm().text();
  49       termFrequencies[i] = termDocumentInformation.getTermPositions().length;
  50     }
  51   }
  52
  53   /**
  54    * @return The number of the field this vector is associated with
  55    */
  56   public String getField() {
  57     return field;
  58   }
  59
  60   @Override
  61   public String toString() {
  62     StringBuilder sb = new StringBuilder();
  63     sb.append('{');
  64     sb.append(field).append(": ");
  65     if (terms != null) {
  66       for (int i = 0; i < terms.length; i++) {
  67         if (i > 0) sb.append(", ");
  68         sb.append(terms[i]).append('/').append(termFrequencies[i]);
  69       }
  70     }
  71     sb.append('}');
  72
  73     return sb.toString();
  74   }
  75
  76   public int size() {
  77     return terms == null ? 0 : terms.length;
  78   }
  79
  80   public String[] getTerms() {
  81     return terms;
  82   }
  83
  84   public int[] getTermFrequencies() {
  85     return termFrequencies;
  86   }
  87
  88   public int indexOf(String termText) {
  89     if (terms == null)
  90       return -1;
  91     int res = Arrays.binarySearch(terms, termText);
  92     return res >= 0 ? res : -1;
  93   }
  94
  95   public int[] indexesOf(String[] termNumbers, int start, int len) {
  96     // TODO: there must be a more efficient way of doing this.
  97     //       At least, we could advance the lower bound of the terms array
  98     //       as we find valid indices. Also, it might be possible to leverage
  99     //       this even more by starting in the middle of the termNumbers array
 100     //       and thus dividing the terms array maybe in half with each found index.
 101     int res[] = new int[len];
 102
 103     for (int i = 0; i < len; i++) {
 104       res[i] = indexOf(termNumbers[start + i]);
 105     }
 106     return res;
 107   }
 108
 109   public List<InstantiatedTermDocumentInformation> getTermDocumentInformations() {
 110     return termDocumentInformations;
 111   }
 112
 113 }