lucene-java-3.4.0/lucene/src/java/org/apache/lucene/search/QueryTermVector.java

   1 package org.apache.lucene.search;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.IOException;
  21 import java.io.StringReader;
  22 import java.util.ArrayList;
  23 import java.util.Arrays;
  24 import java.util.HashMap;
  25
  26 import java.util.List;
  27 import java.util.Map;
  28
  29 import org.apache.lucene.analysis.Analyzer;
  30 import org.apache.lucene.analysis.TokenStream;
  31 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  32 import org.apache.lucene.index.TermFreqVector;
  33 import org.apache.lucene.util.ArrayUtil;
  34
  35 /**
  36  *
  37  *
  38  **/
  39 public class QueryTermVector implements TermFreqVector {
  40   private String [] terms = new String[0];
  41   private int [] termFreqs = new int[0];
  42
  43   public String getField() { return null;  }
  44
  45   /**
  46    *
  47    * @param queryTerms The original list of terms from the query, can contain duplicates
  48    */
  49   public QueryTermVector(String [] queryTerms) {
  50
  51     processTerms(queryTerms);
  52   }
  53
  54   public QueryTermVector(String queryString, Analyzer analyzer) {
  55     if (analyzer != null)
  56     {
  57       TokenStream stream;
  58       try {
  59         stream = analyzer.reusableTokenStream("", new StringReader(queryString));
  60       } catch (IOException e1) {
  61         stream = null;
  62       }
  63       if (stream != null)
  64       {
  65         List<String> terms = new ArrayList<String>();
  66         try {
  67           boolean hasMoreTokens = false;
  68
  69           stream.reset();
  70           final CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
  71
  72           hasMoreTokens = stream.incrementToken();
  73           while (hasMoreTokens) {
  74             terms.add(termAtt.toString());
  75             hasMoreTokens = stream.incrementToken();
  76           }
  77           processTerms(terms.toArray(new String[terms.size()]));
  78         } catch (IOException e) {
  79         }
  80       }
  81     }
  82   }
  83
  84   private void processTerms(String[] queryTerms) {
  85     if (queryTerms != null) {
  86       ArrayUtil.quickSort(queryTerms);
  87       Map<String,Integer> tmpSet = new HashMap<String,Integer>(queryTerms.length);
  88       //filter out duplicates
  89       List<String> tmpList = new ArrayList<String>(queryTerms.length);
  90       List<Integer> tmpFreqs = new ArrayList<Integer>(queryTerms.length);
  91       int j = 0;
  92       for (int i = 0; i < queryTerms.length; i++) {
  93         String term = queryTerms[i];
  94         Integer position = tmpSet.get(term);
  95         if (position == null) {
  96           tmpSet.put(term, Integer.valueOf(j++));
  97           tmpList.add(term);
  98           tmpFreqs.add(Integer.valueOf(1));
  99         }
 100         else {
 101           Integer integer = tmpFreqs.get(position.intValue());
 102           tmpFreqs.set(position.intValue(), Integer.valueOf(integer.intValue() + 1));
 103         }
 104       }
 105       terms = tmpList.toArray(terms);
 106       //termFreqs = (int[])tmpFreqs.toArray(termFreqs);
 107       termFreqs = new int[tmpFreqs.size()];
 108       int i = 0;
 109       for (final Integer integer : tmpFreqs) {
 110         termFreqs[i++] = integer.intValue();
 111       }
 112     }
 113   }
 114
 115   @Override
 116   public final String toString() {
 117         StringBuilder sb = new StringBuilder();
 118         sb.append('{');
 119         for (int i=0; i<terms.length; i++) {
 120             if (i>0) sb.append(", ");
 121             sb.append(terms[i]).append('/').append(termFreqs[i]);
 122         }
 123         sb.append('}');
 124         return sb.toString();
 125     }
 126
 127
 128   public int size() {
 129     return terms.length;
 130   }
 131
 132   public String[] getTerms() {
 133     return terms;
 134   }
 135
 136   public int[] getTermFrequencies() {
 137     return termFreqs;
 138   }
 139
 140   public int indexOf(String term) {
 141     int res = Arrays.binarySearch(terms, term);
 142         return res >= 0 ? res : -1;
 143   }
 144
 145   public int[] indexesOf(String[] terms, int start, int len) {
 146     int res[] = new int[len];
 147
 148     for (int i=0; i < len; i++) {
 149         res[i] = indexOf(terms[i]);
 150     }
 151     return res;
 152   }
 153
 154 }