1 package org.apache.lucene.store.instantiated;
4 * Copyright 2006 The Apache Software Foundation
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
19 import java.io.Serializable;
20 import java.util.Comparator;
22 import org.apache.lucene.index.Term;
25 * A term in the inverted index, coupled to the documents it occurs in.
27 * @see org.apache.lucene.index.Term
29 public class InstantiatedTerm
30 implements Serializable {
32 private static final long serialVersionUID = 1l;
34 public static final Comparator<InstantiatedTerm> comparator = new Comparator<InstantiatedTerm>() {
35 public int compare(InstantiatedTerm instantiatedTerm, InstantiatedTerm instantiatedTerm1) {
36 return instantiatedTerm.getTerm().compareTo(instantiatedTerm1.getTerm());
40 public static final Comparator<Object> termComparator = new Comparator<Object>() {
41 public int compare(Object o, Object o1) {
42 return ((InstantiatedTerm)o).getTerm().compareTo((Term)o1);
49 * index of term in InstantiatedIndex
50 * @see org.apache.lucene.store.instantiated.InstantiatedIndex#getOrderedTerms() */
51 private int termIndex;
54 * @return Term associated with this entry of the index object graph
56 public Term getTerm() {
60 InstantiatedTerm(String field, String text) {
61 this.term = new Term(field, text);
64 // this could speed up TermDocs.skipTo even more
65 // private Map</** document number*/Integer, /** index in associatedDocuments */Integer> associatedDocumentIndexByDocumentNumber = new HashMap<Integer, Integer>();
67 // public Map</** document number*/Integer, /** index in associatedDocuments */Integer> getAssociatedDocumentIndexByDocumentNumber() {
68 // return associatedDocumentIndexByDocumentNumber;
71 /** Ordered by document number */
72 private InstantiatedTermDocumentInformation[] associatedDocuments;
75 * Meta data per document in which this term is occurring.
76 * Ordered by document number.
78 * @return Meta data per document in which this term is occurring.
80 public InstantiatedTermDocumentInformation[] getAssociatedDocuments() {
81 return associatedDocuments;
86 * Meta data per document in which this term is occurring.
87 * Ordered by document number.
89 * @param associatedDocuments meta data per document in which this term is occurring, ordered by document number
91 void setAssociatedDocuments(InstantiatedTermDocumentInformation[] associatedDocuments) {
92 this.associatedDocuments = associatedDocuments;
96 * Finds index to the first beyond the current whose document number is
97 * greater than or equal to <i>target</i>, -1 if there is no such element.
99 * @param target the document number to match
100 * @return -1 if there is no such element
102 public int seekCeilingDocumentInformationIndex(int target) {
103 return seekCeilingDocumentInformationIndex(target, 0, getAssociatedDocuments().length);
107 * Finds index to the first beyond the current whose document number is
108 * greater than or equal to <i>target</i>, -1 if there is no such element.
110 * @param target the document number to match
111 * @param startOffset associated documents index start offset
112 * @return -1 if there is no such element
114 public int seekCeilingDocumentInformationIndex(int target, int startOffset) {
115 return seekCeilingDocumentInformationIndex(target, startOffset, getAssociatedDocuments().length);
119 * Finds index to the first beyond the current whose document number is
120 * greater than or equal to <i>target</i>, -1 if there is no such element.
122 * @param target the document number to match
123 * @param startOffset associated documents index start offset
124 * @param endPosition associated documents index end position
125 * @return -1 if there is no such element
127 public int seekCeilingDocumentInformationIndex(int target, int startOffset, int endPosition) {
129 int pos = binarySearchAssociatedDocuments(target, startOffset, endPosition - startOffset);
131 // int pos = Arrays.binarySearch(getAssociatedDocuments(), target, InstantiatedTermDocumentInformation.doumentNumberIntegerComparator);
136 if (getAssociatedDocuments().length <= pos) {
143 public int binarySearchAssociatedDocuments(int target) {
144 return binarySearchAssociatedDocuments(target, 0);
147 public int binarySearchAssociatedDocuments(int target, int offset) {
148 return binarySearchAssociatedDocuments(target, offset, associatedDocuments.length - offset);
152 * @param target value to search for in the array
153 * @param offset index of the first valid value in the array
154 * @param length number of valid values in the array
155 * @return index of an occurrence of key in array, or -(insertionIndex + 1) if key is not contained in array (<i>insertionIndex</i> is then the index at which key could be inserted).
157 public int binarySearchAssociatedDocuments(int target, int offset, int length) {
159 // implementation originally from http://ochafik.free.fr/blog/?p=106
164 int min = offset, max = offset + length - 1;
165 int minVal = getAssociatedDocuments()[min].getDocument().getDocumentNumber();
166 int maxVal = getAssociatedDocuments()[max].getDocument().getDocumentNumber();
169 int nPreviousSteps = 0;
173 // be careful not to compute key - minVal, for there might be an integer overflow.
174 if (target <= minVal) return target == minVal ? min : -1 - min;
175 if (target >= maxVal) return target == maxVal ? max : -2 - max;
180 // A typical binarySearch algorithm uses pivot = (min + max) / 2.
181 // The pivot we use here tries to be smarter and to choose a pivot close to the expectable location of the key.
182 // This reduces dramatically the number of steps needed to get to the key.
183 // However, it does not work well with a logarithmic distribution of values, for instance.
184 // When the key is not found quickly the smart way, we switch to the standard pivot.
185 if (nPreviousSteps > 2) {
186 pivot = (min + max) >> 1;
187 // stop increasing nPreviousSteps from now on
189 // NOTE: We cannot do the following operations in int precision, because there might be overflows.
190 // long operations are slower than float operations with the hardware this was tested on (intel core duo 2, JVM 1.6.0).
191 // Overall, using float proved to be the safest and fastest approach.
192 pivot = min + (int) ((target - (float) minVal) / (maxVal - (float) minVal) * (max - min));
196 int pivotVal = getAssociatedDocuments()[pivot].getDocument().getDocumentNumber();
198 // NOTE: do not store key - pivotVal because of overflows
199 if (target > pivotVal) {
202 } else if (target == pivotVal) {
208 maxVal = getAssociatedDocuments()[max].getDocument().getDocumentNumber();
209 minVal = getAssociatedDocuments()[min].getDocument().getDocumentNumber();
215 * Navigates to the view of this occurrences of this term in a specific document.
217 * This method is only used by InstantiatedIndex(IndexReader) and
218 * should not be optimized for less CPU at the cost of more RAM.
220 * @param documentNumber the n:th document in the index
221 * @return view of this term from specified document
223 public InstantiatedTermDocumentInformation getAssociatedDocument(int documentNumber) {
224 int pos = binarySearchAssociatedDocuments(documentNumber);
225 return pos < 0 ? null : getAssociatedDocuments()[pos];
228 public final String field() {
232 public final String text() {
237 public String toString() {
238 return term.toString();
242 public int getTermIndex() {
246 public void setTermIndex(int termIndex) {
247 this.termIndex = termIndex;